288 lines
11 KiB
Python
288 lines
11 KiB
Python
import random
|
|
import uuid
|
|
from queue import Queue
|
|
from typing import List
|
|
|
|
from app.services.abc import IReadingService, ILLMService
|
|
from app.configs.constants import QuestionType, TemperatureSettings, FieldsAndExercises, GPTModels
|
|
from app.helpers import ExercisesHelper
|
|
|
|
|
|
class ReadingService(IReadingService):
|
|
|
|
def __init__(self, llm: ILLMService):
|
|
self._llm = llm
|
|
self._passages = {
|
|
"passage_1": {
|
|
"question_type": QuestionType.READING_PASSAGE_1,
|
|
"start_id": 1
|
|
},
|
|
"passage_2": {
|
|
"question_type": QuestionType.READING_PASSAGE_2,
|
|
"start_id": 14
|
|
},
|
|
"passage_3": {
|
|
"question_type": QuestionType.READING_PASSAGE_3,
|
|
"start_id": 27
|
|
}
|
|
}
|
|
|
|
async def gen_reading_passage(
|
|
self,
|
|
passage_id: int,
|
|
topic: str,
|
|
req_exercises: List[str],
|
|
number_of_exercises_q: Queue,
|
|
difficulty: str
|
|
):
|
|
_passage = self._passages[f'passage_{str(passage_id)}']
|
|
|
|
passage = await self.generate_reading_passage(_passage["question_type"], topic)
|
|
|
|
if passage == "":
|
|
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
|
|
|
|
start_id = _passage["start_id"]
|
|
exercises = await self._generate_reading_exercises(
|
|
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
|
|
)
|
|
if ExercisesHelper.contains_empty_dict(exercises):
|
|
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
|
|
|
|
return {
|
|
"exercises": exercises,
|
|
"text": {
|
|
"content": passage["text"],
|
|
"title": passage["title"]
|
|
},
|
|
"difficulty": difficulty
|
|
}
|
|
|
|
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"title": "title of the text", "text": "generated text"}')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
|
|
f'on the topic of "{topic}". The passage should offer a substantial amount of '
|
|
'information, analysis, or narrative relevant to the chosen subject matter. This text '
|
|
'passage aims to serve as the primary reading section of an IELTS test, providing an '
|
|
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
|
|
'does not contain forbidden subjects in muslim countries.'
|
|
)
|
|
|
|
}
|
|
]
|
|
|
|
return await self._llm.prediction(
|
|
GPTModels.GPT_4_O,
|
|
messages,
|
|
FieldsAndExercises.GEN_TEXT_FIELDS,
|
|
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
async def _generate_reading_exercises(
|
|
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
|
|
):
|
|
exercises = []
|
|
for req_exercise in req_exercises:
|
|
number_of_exercises = number_of_exercises_q.get()
|
|
|
|
if req_exercise == "fillBlanks":
|
|
question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
|
|
exercises.append(question)
|
|
print("Added fill blanks: " + str(question))
|
|
elif req_exercise == "trueFalse":
|
|
question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
|
|
exercises.append(question)
|
|
print("Added trueFalse: " + str(question))
|
|
elif req_exercise == "writeBlanks":
|
|
question = await self._gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
|
|
if ExercisesHelper.answer_word_limit_ok(question):
|
|
exercises.append(question)
|
|
print("Added write blanks: " + str(question))
|
|
else:
|
|
exercises.append({})
|
|
print("Did not add write blanks because it did not respect word limit")
|
|
elif req_exercise == "paragraphMatch":
|
|
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
|
|
exercises.append(question)
|
|
print("Added paragraph match: " + str(question))
|
|
|
|
start_id = start_id + number_of_exercises
|
|
|
|
return exercises
|
|
|
|
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'Summarize this text: "{text}"'
|
|
)
|
|
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
|
|
'expressions, from the summary.'
|
|
)
|
|
|
|
}
|
|
]
|
|
|
|
response = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
|
|
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
|
|
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
|
|
|
|
return {
|
|
"allowRepetition": True,
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": (
|
|
"Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
|
|
"more words than spaces so you will not use them all. You may use any of the words more than once."
|
|
),
|
|
"solutions": solutions,
|
|
"text": replaced_summary,
|
|
"type": "fillBlanks",
|
|
"words": options_words
|
|
|
|
}
|
|
|
|
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
|
|
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
|
|
'Ensure that your statements accurately represent information or inferences from the text, and '
|
|
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
|
|
f'as appropriate.\n\nReference text:\n\n {text}'
|
|
)
|
|
}
|
|
]
|
|
|
|
response = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
questions = response["prompts"]
|
|
|
|
if len(questions) > quantity:
|
|
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
|
|
|
|
for i, question in enumerate(questions, start=start_id):
|
|
question["id"] = str(i)
|
|
|
|
return {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
|
|
"questions": questions,
|
|
"type": "trueFalse"
|
|
}
|
|
|
|
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
|
|
f'possible answers, must have maximum 3 words per answer, about this text:\n"{text}"'
|
|
)
|
|
|
|
}
|
|
]
|
|
|
|
response = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
questions = response["questions"][:quantity]
|
|
|
|
return {
|
|
"id": str(uuid.uuid4()),
|
|
"maxWords": 3,
|
|
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
|
|
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
|
|
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
|
|
"type": "writeBlanks"
|
|
}
|
|
|
|
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
|
|
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
'For every paragraph of the list generate a minimum 5 word heading for it. '
|
|
f'The paragraphs are these: {str(paragraphs)}'
|
|
)
|
|
|
|
}
|
|
]
|
|
|
|
response = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
headings = response["headings"]
|
|
|
|
options = []
|
|
for i, paragraph in enumerate(paragraphs, start=0):
|
|
paragraph["heading"] = headings[i]
|
|
options.append({
|
|
"id": paragraph["letter"],
|
|
"sentence": paragraph["paragraph"]
|
|
})
|
|
|
|
random.shuffle(paragraphs)
|
|
sentences = []
|
|
for i, paragraph in enumerate(paragraphs, start=start_id):
|
|
sentences.append({
|
|
"id": i,
|
|
"sentence": paragraph["heading"],
|
|
"solution": paragraph["letter"]
|
|
})
|
|
|
|
return {
|
|
"id": str(uuid.uuid4()),
|
|
"allowRepetition": False,
|
|
"options": options,
|
|
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
|
|
"sentences": sentences[:quantity],
|
|
"type": "matchSentences"
|
|
}
|