Merged in release/async (pull request #43)

ENCOA-255 gpt was grouping parts by sections and the reading passages were not updated with text.content instead of the old context field

Approved-by: Tiago Ribeiro
This commit is contained in:
carlos.mesquita
2024-12-04 09:18:03 +00:00
committed by Tiago Ribeiro
10 changed files with 83 additions and 57 deletions

View File

@@ -54,7 +54,6 @@ async def import_level(
):
return await level_controller.upload_level(exercises, solutions)
@level_router.post(
'/custom/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]

View File

@@ -47,6 +47,7 @@ class ReadingExerciseType(str, Enum):
trueFalse = "trueFalse"
paragraphMatch = "paragraphMatch"
ideaMatch = "ideaMatch"
multipleChoice = "multipleChoice"
class ListeningExerciseType(str, Enum):

View File

@@ -47,10 +47,13 @@ class FillBlanksExercise(BaseModel):
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
class Text(BaseModel):
content: str
title: str
class Part(BaseModel):
exercises: List[Exercise]
context: Optional[str] = Field(default=None)
text: Optional[Text] = Field(default=None)
class Exam(BaseModel):

View File

@@ -5,7 +5,7 @@ from pydantic import ValidationError
from app.dtos.exams.level import (
MultipleChoiceExercise,
FillBlanksExercise,
Part, Exam
Part, Exam, Text
)
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
@@ -17,7 +17,7 @@ class LevelMapper:
parts = []
for part in response['parts']:
part_exercises = part['exercises']
context = part.get('context', None)
text = part.get('text', None)
exercises = []
for exercise in part_exercises:
@@ -32,8 +32,13 @@ class LevelMapper:
exercises.append(exercise_model)
part_kwargs = {"exercises": exercises}
if context is not None:
part_kwargs["context"] = context
if text is not None and text.get('content', None):
title = text.get('title', 'Untitled')
if title == '':
title = 'Untitled'
part_kwargs["text"] = Text(title=title, content=text['content'])
else:
part_kwargs["text"] = None
part_model = Part(**part_kwargs)
parts.append(part_model)

View File

@@ -35,7 +35,7 @@ class UploadLevelModule:
#completion: Coroutine[Any, Any, Exam] = (
# self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
#)
response = await self._html_completion(path_id)
response = await self._html_completion(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
@@ -57,7 +57,10 @@ class UploadLevelModule:
return {
"parts": [
{
"context": "<this attribute is optional you may exclude it if not required>",
"text": {
"content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
"title": "<this attribute is optional you may exclude it if not required>",
},
"exercises": [
self._multiple_choice_html(),
self._passage_blank_space_html()
@@ -66,16 +69,26 @@ class UploadLevelModule:
]
}
async def _html_completion(self, path_id: str) -> Exam:
async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
html = await f.read()
solutions = []
if solutions_provided:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
solutions.append({
"role": "user",
"content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
})
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": html
}
},
*solutions
],
LevelMapper.map_to_exam_model,
str(self._level_json_schema())
@@ -86,7 +99,7 @@ class UploadLevelModule:
"role": "system",
"content": (
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
'Your current task is to scrape html english questions sheets.\n\n'
'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'
'In the question sheet you will only see 4 types of question:\n'
'- blank space multiple choice\n'
@@ -111,23 +124,26 @@ class UploadLevelModule:
'out the best paragraph separation possible.'
'You will place all the information in a single JSON: '
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
'{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
'context.'
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
'reading passage multiple choice require context and if the context is included in parts where it '
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
'text, which contains the reading passages that are required in order to solve the part questions, '
'(if there are passages) place them in text.content and if there is a title place it in text.title '
'else omit the title field.\n'
'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
' word Part x which is in the html. '
'You must strictly adhere to this instruction, do not mistake sections for parts!\n'
'The templates for the exercises are the following:\n'
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
f'{self._multiple_choice_html()}\n'
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
'exercises exclude the context field.'
'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
'without paragraphs or line numbers, with 2 newlines between paragraphs.'
)
}
@@ -135,30 +151,19 @@ class UploadLevelModule:
def _multiple_choice_html():
return {
"type": "multipleChoice",
"prompt": "Select the appropriate option.",
"prompt": "<general instructions for this section>",
"questions": [
{
"id": "<the question id>",
"prompt": "<the question>",
"solution": "<the option id solution>",
"id": "<question number as string>",
"prompt": "<question text>",
"options": [
{
"id": "A",
"text": "<the a option>"
},
{
"id": "B",
"text": "<the b option>"
},
{
"id": "C",
"text": "<the c option>"
},
{
"id": "D",
"text": "<the d option>"
"id": "<A/B/C/D>",
"text": "<option text>"
}
]
],
"solution": "<correct option letter>",
"variant": "text"
}
]
}
@@ -171,17 +176,17 @@ class UploadLevelModule:
"prompt": "Click a blank to select the appropriate word for it.",
"text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
"ids with {{<question id/number>}} with 2 newlines between paragraphs>"
),
"solutions": [
{
"id": "<question id>",
"id": "<question number>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question id>",
"id": "<question number>",
"options": {
"A": "<a option>",
"B": "<b option>",
@@ -205,7 +210,7 @@ class UploadLevelModule:
self._multiple_choice_png(),
{"type": "blanksPassage", "text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
"ids with {{<question number>}} with 2 newlines between paragraphs>"
)},
{"type": "passage", "context": (
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"

View File

@@ -14,11 +14,10 @@ from app.configs.constants import (
)
from app.helpers import FileHelper
from .import_listening import ImportListeningModule
from .multiple_choice import MultipleChoice
from .write_blank_forms import WriteBlankForms
from .write_blanks import WriteBlanks
from .write_blank_notes import WriteBlankNotes
from ..shared import TrueFalse
from ..shared import TrueFalse, MultipleChoice
class ListeningService(IListeningService):
@@ -128,7 +127,7 @@ class ListeningService(IListeningService):
if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
n_options = 4 if req_exercise.type == "multipleChoice" else 3
question = await self._multiple_choice.gen_multiple_choice(
dialog_type, text, req_exercise.quantity, start_id, difficulty, n_options
text, req_exercise.quantity, start_id, difficulty, n_options
)
self._logger.info(f"Added multiple choice: {question}")
return question

View File

@@ -10,7 +10,7 @@ from app.services.abc import IReadingService, ILLMService
from .fill_blanks import FillBlanks
from .idea_match import IdeaMatch
from .paragraph_match import ParagraphMatch
from ..shared import TrueFalse
from ..shared import TrueFalse, MultipleChoice
from .import_reading import ImportReadingModule
from .write_blanks import WriteBlanks
@@ -24,6 +24,7 @@ class ReadingService(IReadingService):
self._paragraph_match = ParagraphMatch(llm)
self._true_false = TrueFalse(llm)
self._write_blanks = WriteBlanks(llm)
self._multiple_choice = MultipleChoice(llm)
self._logger = getLogger(__name__)
self._import = ImportReadingModule(llm)
@@ -119,6 +120,12 @@ class ReadingService(IReadingService):
question["variant"] = "ideaMatch"
self._logger.info(f"Added idea match: {question}")
return question
elif req_exercise.type == "multipleChoice":
question = await self._multiple_choice.gen_multiple_choice(
text, req_exercise.quantity, start_id, difficulty, 4
)
self._logger.info(f"Added multiple choice: {question}")
return question
async def generate_reading_exercises(self, dto: ReadingDTO):
exercise_tasks = []

View File

@@ -98,7 +98,11 @@ class ImportReadingModule:
]
}
],
"text": "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ",
"text": (
"<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
"- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
"- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
),
"type": "writeBlanks",
"prompt": "<specific instructions for this exercise section>"
}
@@ -192,13 +196,14 @@ class ImportReadingModule:
+ (
"Solutions were not provided - analyze the passage carefully to determine correct answers."
if not solutions else
"Use the provided solutions to fill in all answer fields accurately."
"Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
"uppercase convert them to lowercase before assigning them."
)
+
"Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
"There can't be options in lowercase and solutions in uppercase!"
"Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
"There can't be options in lowercase and solutions in uppercase! "
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
"should be a text, if there isn't a title for it choose a reasonable one based on its contents."
"should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
)
return {

View File

@@ -1,5 +1,7 @@
from .true_false import TrueFalse
from .multiple_choice import MultipleChoice
__all__ = [
"TrueFalse"
]
"TrueFalse",
"MultipleChoice"
]

View File

@@ -11,7 +11,7 @@ class MultipleChoice:
self._llm = llm
async def gen_multiple_choice(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
@@ -27,7 +27,7 @@ class MultipleChoice:
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
f'options for this text:\n"' + text + '"')
}
]