diff --git a/app/api/level.py b/app/api/level.py index 2bf9cd6..d46049c 100644 --- a/app/api/level.py +++ b/app/api/level.py @@ -54,7 +54,6 @@ async def import_level( ): return await level_controller.upload_level(exercises, solutions) - @level_router.post( '/custom/', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] diff --git a/app/configs/constants.py b/app/configs/constants.py index b7be474..56b9a54 100644 --- a/app/configs/constants.py +++ b/app/configs/constants.py @@ -47,6 +47,7 @@ class ReadingExerciseType(str, Enum): trueFalse = "trueFalse" paragraphMatch = "paragraphMatch" ideaMatch = "ideaMatch" + multipleChoice = "multipleChoice" class ListeningExerciseType(str, Enum): diff --git a/app/dtos/exams/level.py b/app/dtos/exams/level.py index 92c217b..f5618cf 100644 --- a/app/dtos/exams/level.py +++ b/app/dtos/exams/level.py @@ -47,10 +47,13 @@ class FillBlanksExercise(BaseModel): Exercise = Union[MultipleChoiceExercise, FillBlanksExercise] +class Text(BaseModel): + content: str + title: str class Part(BaseModel): exercises: List[Exercise] - context: Optional[str] = Field(default=None) + text: Optional[Text] = Field(default=None) class Exam(BaseModel): diff --git a/app/mappers/level.py b/app/mappers/level.py index b009ddd..2863f73 100644 --- a/app/mappers/level.py +++ b/app/mappers/level.py @@ -5,7 +5,7 @@ from pydantic import ValidationError from app.dtos.exams.level import ( MultipleChoiceExercise, FillBlanksExercise, - Part, Exam + Part, Exam, Text ) from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord @@ -17,7 +17,7 @@ class LevelMapper: parts = [] for part in response['parts']: part_exercises = part['exercises'] - context = part.get('context', None) + text = part.get('text', None) exercises = [] for exercise in part_exercises: @@ -32,8 +32,13 @@ class LevelMapper: exercises.append(exercise_model) part_kwargs = {"exercises": exercises} - if context is not None: - part_kwargs["context"] = context + if text is not None and text.get('content', None): + title = text.get('title', 'Untitled') + if title == '': + title = 'Untitled' + part_kwargs["text"] = Text(title=title, content=text['content']) + else: + part_kwargs["text"] = None part_model = Part(**part_kwargs) parts.append(part_model) diff --git a/app/services/impl/exam/level/upload.py b/app/services/impl/exam/level/upload.py index 06f2363..365d0bf 100644 --- a/app/services/impl/exam/level/upload.py +++ b/app/services/impl/exam/level/upload.py @@ -35,7 +35,7 @@ class UploadLevelModule: #completion: Coroutine[Any, Any, Exam] = ( # self._png_completion(path_id) if file_has_images else self._html_completion(path_id) #) - response = await self._html_completion(path_id) + response = await self._html_completion(path_id, solutions is not None) FileHelper.remove_directory(f'./tmp/{path_id}') @@ -57,7 +57,10 @@ class UploadLevelModule: return { "parts": [ { - "context": "", + "text": { + "content": "", + "title": "", + }, "exercises": [ self._multiple_choice_html(), self._passage_blank_space_html() @@ -66,16 +69,26 @@ class UploadLevelModule: ] } - async def _html_completion(self, path_id: str) -> Exam: + async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam: async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f: html = await f.read() + solutions = [] + if solutions_provided: + async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f: + solutions_html = await f.read() + solutions.append({ + "role": "user", + "content": f'The solutions to the question sheet are the following:\n\n{solutions_html}' + }) + return await self._llm.pydantic_prediction( [self._gpt_instructions_html(), { "role": "user", "content": html - } + }, + *solutions ], LevelMapper.map_to_exam_model, str(self._level_json_schema()) @@ -86,7 +99,7 @@ class UploadLevelModule: "role": "system", "content": ( 'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.' - 'Your current task is to scrape html english questions sheets.\n\n' + 'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n' 'In the question sheet you will only see 4 types of question:\n' '- blank space multiple choice\n' @@ -111,23 +124,26 @@ class UploadLevelModule: 'out the best paragraph separation possible.' 'You will place all the information in a single JSON: ' - '{"parts": [{"exercises": [{...}], "context": ""}]}\n ' + '{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n ' 'Where {...} are the exercises templates for each part of a question sheet and the optional field ' - 'context.' - - 'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, ' - 'so that you can group the exercises by the parts that are in the html, this is crucial since only ' - 'reading passage multiple choice require context and if the context is included in parts where it ' - 'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n' + 'text, which contains the reading passages that are required in order to solve the part questions, ' + '(if there are passages) place them in text.content and if there is a title place it in text.title ' + 'else omit the title field.\n' + + 'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means ' + 'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST ' + 'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections ' + 'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the' + ' word Part x which is in the html. ' + 'You must strictly adhere to this instruction, do not mistake sections for parts!\n' 'The templates for the exercises are the following:\n' '- blank space multiple choice, underline multiple choice and reading passage multiple choice: ' f'{self._multiple_choice_html()}\n' f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n' - 'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading ' - 'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other ' - 'exercises exclude the context field.' + 'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)' + 'without paragraphs or line numbers, with 2 newlines between paragraphs.' ) } @@ -135,30 +151,19 @@ class UploadLevelModule: def _multiple_choice_html(): return { "type": "multipleChoice", - "prompt": "Select the appropriate option.", + "prompt": "", "questions": [ { - "id": "", - "prompt": "", - "solution": "", + "id": "", + "prompt": "", "options": [ { - "id": "A", - "text": "" - }, - { - "id": "B", - "text": "" - }, - { - "id": "C", - "text": "" - }, - { - "id": "D", - "text": "" + "id": "", + "text": "", "B": "", @@ -205,7 +210,7 @@ class UploadLevelModule: self._multiple_choice_png(), {"type": "blanksPassage", "text": ( "}} with 2 newlines between paragraphs>" + "ids with {{}} with 2 newlines between paragraphs>" )}, {"type": "passage", "context": ( "" diff --git a/app/services/impl/exam/listening/__init__.py b/app/services/impl/exam/listening/__init__.py index f71afde..fe319be 100644 --- a/app/services/impl/exam/listening/__init__.py +++ b/app/services/impl/exam/listening/__init__.py @@ -14,11 +14,10 @@ from app.configs.constants import ( ) from app.helpers import FileHelper from .import_listening import ImportListeningModule -from .multiple_choice import MultipleChoice from .write_blank_forms import WriteBlankForms from .write_blanks import WriteBlanks from .write_blank_notes import WriteBlankNotes -from ..shared import TrueFalse +from ..shared import TrueFalse, MultipleChoice class ListeningService(IListeningService): @@ -128,7 +127,7 @@ class ListeningService(IListeningService): if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options": n_options = 4 if req_exercise.type == "multipleChoice" else 3 question = await self._multiple_choice.gen_multiple_choice( - dialog_type, text, req_exercise.quantity, start_id, difficulty, n_options + text, req_exercise.quantity, start_id, difficulty, n_options ) self._logger.info(f"Added multiple choice: {question}") return question diff --git a/app/services/impl/exam/reading/__init__.py b/app/services/impl/exam/reading/__init__.py index bd1d86a..57a4312 100644 --- a/app/services/impl/exam/reading/__init__.py +++ b/app/services/impl/exam/reading/__init__.py @@ -10,7 +10,7 @@ from app.services.abc import IReadingService, ILLMService from .fill_blanks import FillBlanks from .idea_match import IdeaMatch from .paragraph_match import ParagraphMatch -from ..shared import TrueFalse +from ..shared import TrueFalse, MultipleChoice from .import_reading import ImportReadingModule from .write_blanks import WriteBlanks @@ -24,6 +24,7 @@ class ReadingService(IReadingService): self._paragraph_match = ParagraphMatch(llm) self._true_false = TrueFalse(llm) self._write_blanks = WriteBlanks(llm) + self._multiple_choice = MultipleChoice(llm) self._logger = getLogger(__name__) self._import = ImportReadingModule(llm) @@ -119,6 +120,12 @@ class ReadingService(IReadingService): question["variant"] = "ideaMatch" self._logger.info(f"Added idea match: {question}") return question + elif req_exercise.type == "multipleChoice": + question = await self._multiple_choice.gen_multiple_choice( + text, req_exercise.quantity, start_id, difficulty, 4 + ) + self._logger.info(f"Added multiple choice: {question}") + return question async def generate_reading_exercises(self, dto: ReadingDTO): exercise_tasks = [] diff --git a/app/services/impl/exam/reading/import_reading.py b/app/services/impl/exam/reading/import_reading.py index 32eca34..2888e05 100644 --- a/app/services/impl/exam/reading/import_reading.py +++ b/app/services/impl/exam/reading/import_reading.py @@ -98,7 +98,11 @@ class ImportReadingModule: ] } ], - "text": "{{}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ", + "text": ( + "{{}}\\\\n] " + "- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format " + "- and notice how there is a double backslash before the n -> I want an escaped newline in your output> " + ), "type": "writeBlanks", "prompt": "" } @@ -192,13 +196,14 @@ class ImportReadingModule: + ( "Solutions were not provided - analyze the passage carefully to determine correct answers." if not solutions else - "Use the provided solutions to fill in all answer fields accurately." + "Use the provided solutions to fill in all answer fields accurately, if word answers have all letters " + "uppercase convert them to lowercase before assigning them." ) + - "Pay extra attention to fillblanks exercises the solution and option wording must match in case!" - "There can't be options in lowercase and solutions in uppercase!" + "Pay extra attention to fillblanks exercises the solution and option wording must match in case! " + "There can't be options in lowercase and solutions in uppercase! " "Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there " - "should be a text, if there isn't a title for it choose a reasonable one based on its contents." + "should be a text, if there isn't a title for it choose a reasonable one based on its contents. " ) return { diff --git a/app/services/impl/exam/shared/__init__.py b/app/services/impl/exam/shared/__init__.py index 5778d99..cda6463 100644 --- a/app/services/impl/exam/shared/__init__.py +++ b/app/services/impl/exam/shared/__init__.py @@ -1,5 +1,7 @@ from .true_false import TrueFalse +from .multiple_choice import MultipleChoice __all__ = [ - "TrueFalse" -] \ No newline at end of file + "TrueFalse", + "MultipleChoice" +] diff --git a/app/services/impl/exam/listening/multiple_choice.py b/app/services/impl/exam/shared/multiple_choice.py similarity index 89% rename from app/services/impl/exam/listening/multiple_choice.py rename to app/services/impl/exam/shared/multiple_choice.py index 7e3211e..5418331 100644 --- a/app/services/impl/exam/listening/multiple_choice.py +++ b/app/services/impl/exam/shared/multiple_choice.py @@ -11,7 +11,7 @@ class MultipleChoice: self._llm = llm async def gen_multiple_choice( - self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4 + self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4 ): messages = [ { @@ -27,7 +27,7 @@ class MultipleChoice: "role": "user", "content": ( f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} ' - f'options for this {dialog_type}:\n"' + text + '"') + f'options for this text:\n"' + text + '"') } ]