Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

This commit is contained in:
Carlos-Mesquita
2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions

View File

@@ -0,0 +1,11 @@
from .multiple_choice import MultipleChoice
from .blank_space import BlankSpace
from .passage_utas import PassageUtas
from .fill_blanks import FillBlanks
__all__ = [
"MultipleChoice",
"BlankSpace",
"PassageUtas",
"FillBlanks"
]

View File

@@ -0,0 +1,44 @@
import random
from ielts_be.configs.constants import EducationalContent, GPTModels, TemperatureSettings
from ielts_be.services import ILLMService
class BlankSpace:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]

View File

@@ -0,0 +1,73 @@
import random
from ielts_be.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from ielts_be.services import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_fill_blanks(
self, start_id: int, quantity: int, size: int = 300, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
print(quantity)
print(start_id)
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose exactly {quantity} words (cannot be sequential words) replace '
'each with {{id}} (starting from ' + str(start_id) + ' and incrementing), then generate a '
'JSON object containing: the modified text, a solutions array with each word\'s correct '
'letter (A-D), and a words array containing each id with four options where one is '
'the original word (matching the solution) and three are plausible but incorrect '
'alternatives that maintain grammatical consistency. '
'You cannot use repeated words!' #TODO: Solve this after
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, [], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
**question,
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
}
@staticmethod
def _fill_blanks_mc_template():
return {
"text": "",
"solutions": [
{
"id": "",
"solution": "<A,B,C or D>"
}
],
"words": [
{
"id": "",
"options": {
"A": "",
"B": "",
"C": "",
"D": ""
}
}
]
}

View File

@@ -0,0 +1,84 @@
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class MultipleChoice:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return ExercisesHelper.fix_exercise_ids(questions, start_id)
"""
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
"""

View File

@@ -0,0 +1,91 @@
from typing import Optional
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService, IReadingService
class PassageUtas:
def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
self._llm = llm
self._reading_service = reading_service
self._mc_variants = mc_variants
async def gen_reading_passage_utas(
self, start_id, mc_quantity: int, topic: Optional[str], word_size: Optional[int] # sa_quantity: int,
):
passage = await self._reading_service.generate_reading_passage(1, topic, word_size)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
mc_exercises["type"] = "multipleChoice"
"""
exercises: {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"""
return {
**mc_exercises,
"passage": {
"content": passage["text"],
"title": passage["title"]
},
"mcVariant": "passageUtas"
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response