Merged in release/async (pull request #43)
ENCOA-255 gpt was grouping parts by sections and the reading passages were not updated with text.content instead of the old context field Approved-by: Tiago Ribeiro
This commit is contained in:
@@ -54,7 +54,6 @@ async def import_level(
|
||||
):
|
||||
return await level_controller.upload_level(exercises, solutions)
|
||||
|
||||
|
||||
@level_router.post(
|
||||
'/custom/',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
|
||||
@@ -47,6 +47,7 @@ class ReadingExerciseType(str, Enum):
|
||||
trueFalse = "trueFalse"
|
||||
paragraphMatch = "paragraphMatch"
|
||||
ideaMatch = "ideaMatch"
|
||||
multipleChoice = "multipleChoice"
|
||||
|
||||
|
||||
class ListeningExerciseType(str, Enum):
|
||||
|
||||
@@ -47,10 +47,13 @@ class FillBlanksExercise(BaseModel):
|
||||
|
||||
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
|
||||
|
||||
class Text(BaseModel):
|
||||
content: str
|
||||
title: str
|
||||
|
||||
class Part(BaseModel):
|
||||
exercises: List[Exercise]
|
||||
context: Optional[str] = Field(default=None)
|
||||
text: Optional[Text] = Field(default=None)
|
||||
|
||||
|
||||
class Exam(BaseModel):
|
||||
|
||||
@@ -5,7 +5,7 @@ from pydantic import ValidationError
|
||||
from app.dtos.exams.level import (
|
||||
MultipleChoiceExercise,
|
||||
FillBlanksExercise,
|
||||
Part, Exam
|
||||
Part, Exam, Text
|
||||
)
|
||||
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
|
||||
|
||||
@@ -17,7 +17,7 @@ class LevelMapper:
|
||||
parts = []
|
||||
for part in response['parts']:
|
||||
part_exercises = part['exercises']
|
||||
context = part.get('context', None)
|
||||
text = part.get('text', None)
|
||||
|
||||
exercises = []
|
||||
for exercise in part_exercises:
|
||||
@@ -32,8 +32,13 @@ class LevelMapper:
|
||||
exercises.append(exercise_model)
|
||||
|
||||
part_kwargs = {"exercises": exercises}
|
||||
if context is not None:
|
||||
part_kwargs["context"] = context
|
||||
if text is not None and text.get('content', None):
|
||||
title = text.get('title', 'Untitled')
|
||||
if title == '':
|
||||
title = 'Untitled'
|
||||
part_kwargs["text"] = Text(title=title, content=text['content'])
|
||||
else:
|
||||
part_kwargs["text"] = None
|
||||
|
||||
part_model = Part(**part_kwargs)
|
||||
parts.append(part_model)
|
||||
|
||||
@@ -35,7 +35,7 @@ class UploadLevelModule:
|
||||
#completion: Coroutine[Any, Any, Exam] = (
|
||||
# self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
|
||||
#)
|
||||
response = await self._html_completion(path_id)
|
||||
response = await self._html_completion(path_id, solutions is not None)
|
||||
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
|
||||
@@ -57,7 +57,10 @@ class UploadLevelModule:
|
||||
return {
|
||||
"parts": [
|
||||
{
|
||||
"context": "<this attribute is optional you may exclude it if not required>",
|
||||
"text": {
|
||||
"content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
|
||||
"title": "<this attribute is optional you may exclude it if not required>",
|
||||
},
|
||||
"exercises": [
|
||||
self._multiple_choice_html(),
|
||||
self._passage_blank_space_html()
|
||||
@@ -66,16 +69,26 @@ class UploadLevelModule:
|
||||
]
|
||||
}
|
||||
|
||||
async def _html_completion(self, path_id: str) -> Exam:
|
||||
async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
|
||||
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
|
||||
html = await f.read()
|
||||
|
||||
solutions = []
|
||||
if solutions_provided:
|
||||
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
|
||||
solutions_html = await f.read()
|
||||
solutions.append({
|
||||
"role": "user",
|
||||
"content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
|
||||
})
|
||||
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_html(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": html
|
||||
}
|
||||
},
|
||||
*solutions
|
||||
],
|
||||
LevelMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
@@ -86,7 +99,7 @@ class UploadLevelModule:
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
|
||||
'Your current task is to scrape html english questions sheets.\n\n'
|
||||
'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'
|
||||
|
||||
'In the question sheet you will only see 4 types of question:\n'
|
||||
'- blank space multiple choice\n'
|
||||
@@ -111,23 +124,26 @@ class UploadLevelModule:
|
||||
'out the best paragraph separation possible.'
|
||||
|
||||
'You will place all the information in a single JSON: '
|
||||
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
|
||||
'{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
|
||||
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
|
||||
'context.'
|
||||
|
||||
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
|
||||
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
|
||||
'reading passage multiple choice require context and if the context is included in parts where it '
|
||||
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
|
||||
'text, which contains the reading passages that are required in order to solve the part questions, '
|
||||
'(if there are passages) place them in text.content and if there is a title place it in text.title '
|
||||
'else omit the title field.\n'
|
||||
|
||||
'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
|
||||
'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
|
||||
'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
|
||||
'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
|
||||
' word Part x which is in the html. '
|
||||
'You must strictly adhere to this instruction, do not mistake sections for parts!\n'
|
||||
|
||||
'The templates for the exercises are the following:\n'
|
||||
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
|
||||
f'{self._multiple_choice_html()}\n'
|
||||
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
|
||||
|
||||
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
|
||||
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
|
||||
'exercises exclude the context field.'
|
||||
'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
|
||||
'without paragraphs or line numbers, with 2 newlines between paragraphs.'
|
||||
)
|
||||
}
|
||||
|
||||
@@ -135,30 +151,19 @@ class UploadLevelModule:
|
||||
def _multiple_choice_html():
|
||||
return {
|
||||
"type": "multipleChoice",
|
||||
"prompt": "Select the appropriate option.",
|
||||
"prompt": "<general instructions for this section>",
|
||||
"questions": [
|
||||
{
|
||||
"id": "<the question id>",
|
||||
"prompt": "<the question>",
|
||||
"solution": "<the option id solution>",
|
||||
"id": "<question number as string>",
|
||||
"prompt": "<question text>",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "<the a option>"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "<the b option>"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "<the c option>"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "<the d option>"
|
||||
"id": "<A/B/C/D>",
|
||||
"text": "<option text>"
|
||||
}
|
||||
]
|
||||
],
|
||||
"solution": "<correct option letter>",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -171,17 +176,17 @@ class UploadLevelModule:
|
||||
"prompt": "Click a blank to select the appropriate word for it.",
|
||||
"text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question id>}} with 2 newlines between paragraphs>"
|
||||
"ids with {{<question id/number>}} with 2 newlines between paragraphs>"
|
||||
),
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"id": "<question number>",
|
||||
"solution": "<the option that holds the solution>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"id": "<question number>",
|
||||
"options": {
|
||||
"A": "<a option>",
|
||||
"B": "<b option>",
|
||||
@@ -205,7 +210,7 @@ class UploadLevelModule:
|
||||
self._multiple_choice_png(),
|
||||
{"type": "blanksPassage", "text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question id>}} with 2 newlines between paragraphs>"
|
||||
"ids with {{<question number>}} with 2 newlines between paragraphs>"
|
||||
)},
|
||||
{"type": "passage", "context": (
|
||||
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
|
||||
|
||||
@@ -14,11 +14,10 @@ from app.configs.constants import (
|
||||
)
|
||||
from app.helpers import FileHelper
|
||||
from .import_listening import ImportListeningModule
|
||||
from .multiple_choice import MultipleChoice
|
||||
from .write_blank_forms import WriteBlankForms
|
||||
from .write_blanks import WriteBlanks
|
||||
from .write_blank_notes import WriteBlankNotes
|
||||
from ..shared import TrueFalse
|
||||
from ..shared import TrueFalse, MultipleChoice
|
||||
|
||||
|
||||
class ListeningService(IListeningService):
|
||||
@@ -128,7 +127,7 @@ class ListeningService(IListeningService):
|
||||
if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
|
||||
n_options = 4 if req_exercise.type == "multipleChoice" else 3
|
||||
question = await self._multiple_choice.gen_multiple_choice(
|
||||
dialog_type, text, req_exercise.quantity, start_id, difficulty, n_options
|
||||
text, req_exercise.quantity, start_id, difficulty, n_options
|
||||
)
|
||||
self._logger.info(f"Added multiple choice: {question}")
|
||||
return question
|
||||
|
||||
@@ -10,7 +10,7 @@ from app.services.abc import IReadingService, ILLMService
|
||||
from .fill_blanks import FillBlanks
|
||||
from .idea_match import IdeaMatch
|
||||
from .paragraph_match import ParagraphMatch
|
||||
from ..shared import TrueFalse
|
||||
from ..shared import TrueFalse, MultipleChoice
|
||||
from .import_reading import ImportReadingModule
|
||||
from .write_blanks import WriteBlanks
|
||||
|
||||
@@ -24,6 +24,7 @@ class ReadingService(IReadingService):
|
||||
self._paragraph_match = ParagraphMatch(llm)
|
||||
self._true_false = TrueFalse(llm)
|
||||
self._write_blanks = WriteBlanks(llm)
|
||||
self._multiple_choice = MultipleChoice(llm)
|
||||
self._logger = getLogger(__name__)
|
||||
self._import = ImportReadingModule(llm)
|
||||
|
||||
@@ -119,6 +120,12 @@ class ReadingService(IReadingService):
|
||||
question["variant"] = "ideaMatch"
|
||||
self._logger.info(f"Added idea match: {question}")
|
||||
return question
|
||||
elif req_exercise.type == "multipleChoice":
|
||||
question = await self._multiple_choice.gen_multiple_choice(
|
||||
text, req_exercise.quantity, start_id, difficulty, 4
|
||||
)
|
||||
self._logger.info(f"Added multiple choice: {question}")
|
||||
return question
|
||||
|
||||
async def generate_reading_exercises(self, dto: ReadingDTO):
|
||||
exercise_tasks = []
|
||||
|
||||
@@ -98,7 +98,11 @@ class ImportReadingModule:
|
||||
]
|
||||
}
|
||||
],
|
||||
"text": "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ",
|
||||
"text": (
|
||||
"<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
|
||||
"- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
|
||||
"- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
|
||||
),
|
||||
"type": "writeBlanks",
|
||||
"prompt": "<specific instructions for this exercise section>"
|
||||
}
|
||||
@@ -192,13 +196,14 @@ class ImportReadingModule:
|
||||
+ (
|
||||
"Solutions were not provided - analyze the passage carefully to determine correct answers."
|
||||
if not solutions else
|
||||
"Use the provided solutions to fill in all answer fields accurately."
|
||||
"Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
|
||||
"uppercase convert them to lowercase before assigning them."
|
||||
)
|
||||
+
|
||||
"Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
|
||||
"There can't be options in lowercase and solutions in uppercase!"
|
||||
"Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
|
||||
"There can't be options in lowercase and solutions in uppercase! "
|
||||
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
|
||||
"should be a text, if there isn't a title for it choose a reasonable one based on its contents."
|
||||
"should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from .true_false import TrueFalse
|
||||
from .multiple_choice import MultipleChoice
|
||||
|
||||
__all__ = [
|
||||
"TrueFalse"
|
||||
]
|
||||
"TrueFalse",
|
||||
"MultipleChoice"
|
||||
]
|
||||
|
||||
@@ -11,7 +11,7 @@ class MultipleChoice:
|
||||
self._llm = llm
|
||||
|
||||
async def gen_multiple_choice(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
|
||||
self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
@@ -27,7 +27,7 @@ class MultipleChoice:
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
|
||||
f'options for this {dialog_type}:\n"' + text + '"')
|
||||
f'options for this text:\n"' + text + '"')
|
||||
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user