Updated this to the latest version of develop, got rid of most of the duplication, might be missing some packages in toml, needs testing

This commit is contained in:
Carlos Mesquita
2024-08-30 02:35:11 +01:00
parent 3cf9fa5cba
commit f92a803d96
73 changed files with 3642 additions and 2703 deletions

View File

@@ -12,42 +12,25 @@ class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._passages = {
"passage_1": {
"question_type": QuestionType.READING_PASSAGE_1,
"start_id": 1
},
"passage_2": {
"question_type": QuestionType.READING_PASSAGE_2,
"start_id": 14
},
"passage_3": {
"question_type": QuestionType.READING_PASSAGE_3,
"start_id": 27
}
}
async def gen_reading_passage(
self,
passage_id: int,
part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str
difficulty: str,
start_id: int
):
_passage = self._passages[f'passage_{str(passage_id)}']
passage = await self.generate_reading_passage(_passage["question_type"], topic)
if passage == "":
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
start_id = _passage["start_id"]
passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
if ExercisesHelper.contains_empty_dict(exercises):
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
return await self.gen_reading_passage(
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
)
return {
"exercises": exercises,
@@ -58,7 +41,17 @@ class ReadingService(IReadingService):
"difficulty": difficulty
}
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
@@ -69,17 +62,26 @@ class ReadingService(IReadingService):
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on you generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
@@ -95,11 +97,15 @@ class ReadingService(IReadingService):
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
question = await self._gen_summary_fill_blanks_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
question = await self._gen_true_false_not_given_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
@@ -114,32 +120,28 @@ class ReadingService(IReadingService):
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
async def _gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": (
f'Summarize this text: "{text}"'
)
},
{
"role": "user",
"content": (
f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
'expressions, from the summary.'
)
"content": f'Summarize this text: "{text}"'
}
]
@@ -148,22 +150,45 @@ class ReadingService(IReadingService):
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
@@ -210,7 +235,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
@@ -243,7 +269,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
@@ -262,7 +289,7 @@ class ReadingService(IReadingService):
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
@@ -285,3 +312,38 @@ class ReadingService(IReadingService):
"sentences": sentences[:quantity],
"type": "matchSentences"
}
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}