Changes to endpoints so they allow to only get context and then the exercises as well as tidying up a bit

This commit is contained in:
Carlos-Mesquita
2024-11-04 23:31:48 +00:00
parent 2a032c5aba
commit 84ed2f2f6a
83 changed files with 4229 additions and 1843 deletions

View File

@@ -0,0 +1,131 @@
from logging import getLogger
from fastapi import UploadFile
from app.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
from app.dtos.reading import ReadingDTO
from app.helpers import ExercisesHelper
from app.services.abc import IReadingService, ILLMService
from .fill_blanks import FillBlanks
from .idea_match import IdeaMatch
from .paragraph_match import ParagraphMatch
from .true_false import TrueFalse
from .import_reading import ImportReadingModule
from .write_blanks import WriteBlanks
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._fill_blanks = FillBlanks(llm)
self._idea_match = IdeaMatch(llm)
self._paragraph_match = ParagraphMatch(llm)
self._true_false = TrueFalse(llm)
self._write_blanks = WriteBlanks(llm)
self._logger = getLogger(__name__)
self._import = ImportReadingModule(llm)
async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
return await self._import.import_from_file(exercises, solutions)
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on your generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def generate_reading_exercises(self, dto: ReadingDTO):
exercises = []
start_id = 1
for req_exercise in dto.exercises:
if req_exercise.type == "fillBlanks":
question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.num_random_words
)
exercises.append(question)
self._logger.info(f"Added fill blanks: {question}")
elif req_exercise.type == "trueFalse":
question = await self._true_false.gen_true_false_not_given_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty
)
exercises.append(question)
self._logger.info(f"Added trueFalse: {question}")
elif req_exercise.type == "writeBlanks":
question = await self._write_blanks.gen_write_blanks_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.max_words
)
if ExercisesHelper.answer_word_limit_ok(question):
exercises.append(question)
self._logger.info(f"Added write blanks: {question}")
else:
exercises.append({})
self._logger.info("Did not add write blanks because it did not respect word limit")
elif req_exercise.type == "paragraphMatch":
question = await self._paragraph_match.gen_paragraph_match_exercise(
dto.text, req_exercise.quantity, start_id
)
exercises.append(question)
self._logger.info(f"Added paragraph match: {question}")
elif req_exercise.type == "ideaMatch":
question = await self._idea_match.gen_idea_match_exercise(
dto.text, req_exercise.quantity, start_id
)
question["variant"] = "ideaMatch"
exercises.append(question)
self._logger.info(f"Added idea match: {question}")
start_id = start_id + req_exercise.quantity
return {
"exercises": exercises
}

View File

@@ -0,0 +1,73 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}

View File

@@ -0,0 +1,46 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class IdeaMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -0,0 +1,190 @@
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from app.helpers import FileHelper
from app.mappers.reading import ReadingMapper
from app.services.abc import ILLMService
from app.dtos.exams.reading import Exam
class ImportReadingModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
async def import_from_file(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
response = await self._get_reading_parts(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
exercises_html = await f.read()
messages = [
self._instructions(),
{
"role": "user",
"content": f"Exam question sheet:\n\n{exercises_html}"
}
]
if solutions:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ReadingMapper.map_to_exam_model,
str(self._reading_json_schema())
)
def _reading_json_schema(self):
json = self._reading_exam_template()
json["parts"][0]["exercises"] = [
self._write_blanks(),
self._fill_blanks(),
self._match_sentences(),
self._true_false()
]
@staticmethod
def _reading_exam_template():
return {
"minTimer": "<number of minutes as int not string>",
"parts": [
{
"text": {
"title": "<title of the passage>",
"content": "<the text of the passage>",
},
"exercises": []
}
]
}
@staticmethod
def _write_blanks():
return {
"maxWords": "<number of max words return the int value not string>",
"solutions": [
{
"id": "<number of the question as string>",
"solution": [
"<at least one solution can have alternative solutions (that dont exceed maxWords)>"
]
},
],
"text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >",
"type": "writeBlanks"
}
@staticmethod
def _match_sentences():
return {
"options": [
{
"id": "<uppercase letter that identifies a paragraph>",
"sentence": "<either a heading or an idea>"
}
],
"sentences": [
{
"id": "<the question id not the option id>",
"solution": "<id in options>",
"sentence": "<heading or an idea>",
}
],
"type": "matchSentences",
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
}
@staticmethod
def _true_false():
return {
"questions": [
{
"prompt": "<question>",
"solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
"id": "<the question id>"
}
],
"type": "trueFalse"
}
@staticmethod
def _fill_blanks():
return {
"solutions": [
{
"id": "<blank id>",
"solution": "<word>"
}
],
"text": "<section of text with blanks denoted by {{<blank id>}}>",
"type": "fillBlanks",
"words": [
{
"letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
"word": "<word>"
}
]
}
def _instructions(self, solutions = False):
solutions_str = " and its solutions" if solutions else ""
tail = (
"The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
"all the solutions right!"
if not solutions else
"Do your utmost to correctly identify the sections, its exercises and respective solutions"
)
return {
"role": "system",
"content": (
f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
"You will need find out how many parts the exam has a correctly place its exercises. You will "
"encounter 4 types of exercises:\n"
" - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
" - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
" - \"trueFalse\": questions that its answers can only be true false or not given\n"
" - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
"contains the solutions and sometimes random words to throw off the students\n"
"These 4 types of exercises will need to be placed in the correct json template inside each part, "
"the templates are as follows:\n "
f"writeBlanks: {self._write_blanks()}\n"
f"matchSentences: {self._match_sentences()}\n"
f"trueFalse: {self._true_false()}\n"
f"fillBlanks: {self._fill_blanks()}\n\n"
f"{tail}"
)
}

View File

@@ -0,0 +1,63 @@
import random
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class ParagraphMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}

View File

@@ -0,0 +1,49 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class TrueFalse:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id: int, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}

View File

@@ -0,0 +1,44 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": max_words,
"prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}