Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

This commit is contained in:
Carlos-Mesquita
2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions

View File

@@ -0,0 +1,147 @@
import asyncio
from logging import getLogger
from fastapi import UploadFile
from ielts_be.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
from ielts_be.dtos.reading import ReadingDTO
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import IReadingService, ILLMService
from .fill_blanks import FillBlanks
from .idea_match import IdeaMatch
from .paragraph_match import ParagraphMatch
from ..shared import TrueFalse, MultipleChoice
from .import_reading import ImportReadingModule
from .write_blanks import WriteBlanks
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._fill_blanks = FillBlanks(llm)
self._idea_match = IdeaMatch(llm)
self._paragraph_match = ParagraphMatch(llm)
self._true_false = TrueFalse(llm)
self._write_blanks = WriteBlanks(llm)
self._multiple_choice = MultipleChoice(llm)
self._logger = getLogger(__name__)
self._import = ImportReadingModule(llm)
async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
return await self._import.import_from_file(exercises, solutions)
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on your generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def _generate_single_exercise(self, req_exercise, text: str, start_id: int, difficulty: str) -> dict:
if req_exercise.type == "fillBlanks":
question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
text, req_exercise.quantity, start_id, difficulty, req_exercise.num_random_words
)
self._logger.info(f"Added fill blanks: {question}")
return question
elif req_exercise.type == "trueFalse":
question = await self._true_false.gen_true_false_not_given_exercise(
text, req_exercise.quantity, start_id, difficulty, "reading"
)
self._logger.info(f"Added trueFalse: {question}")
return question
elif req_exercise.type == "writeBlanks":
question = await self._write_blanks.gen_write_blanks_exercise(
text, req_exercise.quantity, start_id, difficulty, req_exercise.max_words
)
if ExercisesHelper.answer_word_limit_ok(question):
self._logger.info(f"Added write blanks: {question}")
return question
else:
self._logger.info("Did not add write blanks because it did not respect word limit")
return {}
elif req_exercise.type == "paragraphMatch":
question = await self._paragraph_match.gen_paragraph_match_exercise(
text, req_exercise.quantity, start_id
)
self._logger.info(f"Added paragraph match: {question}")
return question
elif req_exercise.type == "ideaMatch":
question = await self._idea_match.gen_idea_match_exercise(
text, req_exercise.quantity, start_id
)
question["variant"] = "ideaMatch"
self._logger.info(f"Added idea match: {question}")
return question
elif req_exercise.type == "multipleChoice":
question = await self._multiple_choice.gen_multiple_choice(
text, req_exercise.quantity, start_id, difficulty, 4
)
self._logger.info(f"Added multiple choice: {question}")
return question
async def generate_reading_exercises(self, dto: ReadingDTO):
exercise_tasks = []
start_id = 1
for req_exercise in dto.exercises:
exercise_tasks.append(
self._generate_single_exercise(
req_exercise,
dto.text,
start_id,
dto.difficulty
)
)
start_id += req_exercise.quantity
return {
"exercises": await asyncio.gather(*exercise_tasks)
}

View File

@@ -0,0 +1,73 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}

View File

@@ -0,0 +1,46 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class IdeaMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -0,0 +1,237 @@
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from ielts_be.helpers import FileHelper
from ielts_be.mappers.reading import ReadingMapper
from ielts_be.services import ILLMService
from ielts_be.dtos.exams.reading import Exam
class ImportReadingModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
async def import_from_file(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
response = await self._get_reading_parts(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
exercises_html = await f.read()
messages = [
self._instructions(solutions),
{
"role": "user",
"content": f"Exam question sheet:\n\n{exercises_html}"
}
]
if solutions:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ReadingMapper.map_to_exam_model,
str(self._reading_json_schema())
)
def _reading_json_schema(self):
json = self._reading_exam_template()
json["parts"][0]["exercises"] = [
self._write_blanks(),
self._fill_blanks(),
self._match_sentences(),
self._true_false(),
self._multiple_choice()
]
return json
@staticmethod
def _reading_exam_template():
return {
"minTimer": "<integer representing minutes allowed for the exam>",
"parts": [
{
"text": {
"title": "<title of the reading passage>",
"content": "<full text content of the reading passage>",
},
"exercises": []
}
]
}
@staticmethod
def _write_blanks():
return {
"maxWords": "<integer max words allowed per answer>",
"solutions": [
{
"id": "<question number as string>",
"solution": [
"<acceptable answer(s) within maxWords limit>"
]
}
],
"text": (
"<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
"- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
"- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
),
"type": "writeBlanks",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _match_sentences():
return {
"options": [
{
"id": "<paragraph letter A-F>",
"sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
}
],
"sentences": [
{
"id": "<question number as string>",
"solution": "<matching paragraph letter>",
"sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
}
],
"type": "matchSentences",
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _true_false():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
}
],
"type": "trueFalse",
"prompt": "<specific instructions including T/F/NG marking scheme>"
}
@staticmethod
def _multiple_choice():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<question text>",
"options": [
{
"id": "<A, B, or C>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
],
"type": "multipleChoice",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _fill_blanks():
return {
"solutions": [
{
"id": "<blank number>",
"solution": "<correct word>"
}
],
"text": "<text passage with blanks marked as {{<blank number>}}>",
"type": "fillBlanks",
"words": [
{
"letter": "<word identifier letter>",
"word": "<word from word bank>"
}
],
"prompt": "<specific instructions for this exercise section>"
}
def _instructions(self, solutions=False):
solutions_str = " and its solutions" if solutions else ""
tail = (
"Parse the exam carefully and identify:\n"
"1. Time limit from instructions\n"
"2. Reading passage title and full content\n"
"3. All exercise sections and their specific instructions\n"
"4. Question numbering and grouping\n"
"5. Word limits and formatting requirements\n"
"6. Specific marking schemes (e.g., T/F/NG)\n\n"
+ (
"Solutions were not provided - analyze the passage carefully to determine correct answers."
if not solutions else
"Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
"uppercase convert them to lowercase before assigning them."
)
+
"Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
"There can't be options in lowercase and solutions in uppercase! "
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
"should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
)
return {
"role": "system",
"content": (
f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
f"to this json template: {self._reading_exam_template()}\n\n"
"The exam contains these exercise types:\n"
"1. \"writeBlanks\": Short answer questions with strict word limits\n"
"2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
"3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
"4. \"fillBlanks\": Complete text using provided word bank\n"
"5. \"multipleChoice\": Select correct option from choices\n\n"
"Exercise templates:\n"
f"writeBlanks: {self._write_blanks()}\n"
f"matchSentences: {self._match_sentences()}\n"
f"trueFalse: {self._true_false()}\n"
f"fillBlanks: {self._fill_blanks()}\n"
f"multipleChoice: {self._multiple_choice()}\n\n"
"Important details to capture:\n"
"- Exercise section instructions and constraints\n"
"- Question numbering and grouping\n"
"- Word limits and formatting requirements\n"
"- Marking schemes and answer formats\n\n"
f"{tail}"
)
}

View File

@@ -0,0 +1,63 @@
import random
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class ParagraphMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}

View File

@@ -0,0 +1,44 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": max_words,
"prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}