Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274
This commit is contained in:
210
ielts_be/services/impl/exam/level/__init__.py
Normal file
210
ielts_be/services/impl/exam/level/__init__.py
Normal file
@@ -0,0 +1,210 @@
|
||||
from asyncio import gather
|
||||
from typing import Dict, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from fastapi import UploadFile
|
||||
|
||||
import random
|
||||
|
||||
from ielts_be.configs.constants import EducationalContent
|
||||
from ielts_be.dtos.level import LevelExercisesDTO
|
||||
from ielts_be.repositories import IDocumentStore
|
||||
from ielts_be.services import (
|
||||
ILevelService, ILLMService, IReadingService,
|
||||
IWritingService, IListeningService, ISpeakingService
|
||||
)
|
||||
from .exercises import MultipleChoice, BlankSpace, PassageUtas, FillBlanks
|
||||
from .full_exams import CustomLevelModule, LevelUtas
|
||||
from .upload import UploadLevelModule
|
||||
|
||||
|
||||
class LevelService(ILevelService):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: ILLMService,
|
||||
document_store: IDocumentStore,
|
||||
mc_variants: Dict,
|
||||
reading_service: IReadingService,
|
||||
writing_service: IWritingService,
|
||||
speaking_service: ISpeakingService,
|
||||
listening_service: IListeningService
|
||||
):
|
||||
self._llm = llm
|
||||
self._document_store = document_store
|
||||
self._reading_service = reading_service
|
||||
self._upload_module = UploadLevelModule(llm)
|
||||
self._mc_variants = mc_variants
|
||||
|
||||
self._mc = MultipleChoice(llm, mc_variants)
|
||||
self._blank_space = BlankSpace(llm, mc_variants)
|
||||
self._passage_utas = PassageUtas(llm, reading_service, mc_variants)
|
||||
self._fill_blanks = FillBlanks(llm)
|
||||
|
||||
self._level_utas = LevelUtas(llm, self, mc_variants)
|
||||
self._custom = CustomLevelModule(
|
||||
llm, self, reading_service, listening_service, writing_service, speaking_service
|
||||
)
|
||||
|
||||
|
||||
async def upload_level(self, upload: UploadFile, solutions: Optional[UploadFile] = None) -> Dict:
|
||||
return await self._upload_module.generate_level_from_file(upload, solutions)
|
||||
|
||||
async def _generate_exercise(self, req_exercise, start_id):
|
||||
if req_exercise.type == "mcBlank":
|
||||
questions = await self._mc.gen_multiple_choice("blank_space", req_exercise.quantity, start_id)
|
||||
questions["variant"] = "mcBlank"
|
||||
questions["type"] = "multipleChoice"
|
||||
questions["prompt"] = "Choose the correct word or group of words that completes the sentences."
|
||||
return questions
|
||||
|
||||
elif req_exercise.type == "mcUnderline":
|
||||
questions = await self._mc.gen_multiple_choice("underline", req_exercise.quantity, start_id)
|
||||
questions["variant"] = "mcUnderline"
|
||||
questions["type"] = "multipleChoice"
|
||||
questions["prompt"] = "Choose the underlined word or group of words that is not correct."
|
||||
return questions
|
||||
|
||||
elif req_exercise.type == "passageUtas":
|
||||
topic = req_exercise.topic if req_exercise.topic else random.choice(EducationalContent.TOPICS)
|
||||
exercise = await self._passage_utas.gen_reading_passage_utas(
|
||||
start_id,
|
||||
req_exercise.quantity,
|
||||
topic,
|
||||
req_exercise.text_size
|
||||
)
|
||||
exercise["prompt"] = "Read the text and answer the questions below."
|
||||
|
||||
return exercise
|
||||
|
||||
elif req_exercise.type == "fillBlanksMC":
|
||||
exercise = await self._fill_blanks.gen_fill_blanks(
|
||||
start_id,
|
||||
req_exercise.quantity,
|
||||
req_exercise.text_size,
|
||||
req_exercise.topic
|
||||
)
|
||||
exercise["prompt"] = "Read the text below and choose the correct word for each space."
|
||||
return exercise
|
||||
|
||||
async def generate_exercises(self, dto: LevelExercisesDTO):
|
||||
start_ids = []
|
||||
current_id = 1
|
||||
for req_exercise in dto.exercises:
|
||||
start_ids.append(current_id)
|
||||
current_id += req_exercise.quantity
|
||||
|
||||
tasks = [
|
||||
self._generate_exercise(req_exercise, start_id)
|
||||
for req_exercise, start_id in zip(dto.exercises, start_ids)
|
||||
]
|
||||
questions = await gather(*tasks)
|
||||
questions = [{'id': str(uuid4()), **exercise} for exercise in questions]
|
||||
|
||||
return {"exercises": questions}
|
||||
|
||||
# Just here to support other modules that I don't know if they are supposed to still be used
|
||||
async def gen_multiple_choice(self, mc_variant: str, quantity: int, start_id: int = 1):
|
||||
return await self._mc.gen_multiple_choice(mc_variant, quantity, start_id)
|
||||
|
||||
async def gen_reading_passage_utas(self, start_id, mc_quantity: int, topic=Optional[str]): # sa_quantity: int,
|
||||
return await self._passage_utas.gen_reading_passage_utas(start_id, mc_quantity, topic)
|
||||
|
||||
async def gen_blank_space_text_utas(self, quantity: int, start_id: int, size: int, topic: str):
|
||||
return await self._blank_space.gen_blank_space_text_utas(quantity, start_id, size, topic)
|
||||
|
||||
async def get_level_exam(
|
||||
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
|
||||
) -> Dict:
|
||||
pass
|
||||
|
||||
async def get_level_utas(self):
|
||||
return await self._level_utas.get_level_utas()
|
||||
|
||||
async def get_custom_level(self, data: Dict):
|
||||
return await self._custom.get_custom_level(data)
|
||||
"""
|
||||
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
|
||||
mc_template = self._mc_variants[mc_variant]["questions"][0]
|
||||
blank_mod = " blank space " if mc_variant == "blank_space" else " "
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
|
||||
f'it can be easy, intermediate or advanced.'
|
||||
)
|
||||
|
||||
}
|
||||
]
|
||||
|
||||
if mc_variant == "underline":
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'The type of multiple choice in the prompt has wrong words or group of words and the options '
|
||||
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
||||
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
||||
'the boss <u>is</u> nice."\n'
|
||||
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
||||
)
|
||||
})
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question
|
||||
"""
|
||||
"""
|
||||
async def _replace_exercise_if_exists(
|
||||
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
|
||||
):
|
||||
# Extracting relevant fields for comparison
|
||||
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
||||
# Check if the key is in the set
|
||||
if key in seen_keys:
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
|
||||
mc_variant, utas
|
||||
)
|
||||
else:
|
||||
seen_keys.add(key)
|
||||
|
||||
if not utas:
|
||||
for exam in all_exams:
|
||||
exam_dict = exam.to_dict()
|
||||
if len(exam_dict.get("parts", [])) > 0:
|
||||
exercise_dict = exam_dict.get("parts", [])[0]
|
||||
if len(exercise_dict.get("exercises", [])) > 0:
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
|
||||
):
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
|
||||
seen_keys, mc_variant, utas
|
||||
)
|
||||
else:
|
||||
for exam in all_exams:
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exam.get("questions", [])
|
||||
):
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
|
||||
seen_keys, mc_variant, utas
|
||||
)
|
||||
return current_exercise, seen_keys
|
||||
"""
|
||||
11
ielts_be/services/impl/exam/level/exercises/__init__.py
Normal file
11
ielts_be/services/impl/exam/level/exercises/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .multiple_choice import MultipleChoice
|
||||
from .blank_space import BlankSpace
|
||||
from .passage_utas import PassageUtas
|
||||
from .fill_blanks import FillBlanks
|
||||
|
||||
__all__ = [
|
||||
"MultipleChoice",
|
||||
"BlankSpace",
|
||||
"PassageUtas",
|
||||
"FillBlanks"
|
||||
]
|
||||
44
ielts_be/services/impl/exam/level/exercises/blank_space.py
Normal file
44
ielts_be/services/impl/exam/level/exercises/blank_space.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import random
|
||||
|
||||
from ielts_be.configs.constants import EducationalContent, GPTModels, TemperatureSettings
|
||||
from ielts_be.services import ILLMService
|
||||
|
||||
|
||||
class BlankSpace:
|
||||
|
||||
def __init__(self, llm: ILLMService, mc_variants: dict):
|
||||
self._llm = llm
|
||||
self._mc_variants = mc_variants
|
||||
|
||||
async def gen_blank_space_text_utas(
|
||||
self, quantity: int, start_id: int, size: int, topic=None
|
||||
):
|
||||
if not topic:
|
||||
topic = random.choice(EducationalContent.MTI_TOPICS)
|
||||
|
||||
json_template = self._mc_variants["blank_space_text"]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate a text of at least {size} words about the topic {topic}.'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
|
||||
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
||||
'The ids must be ordered throughout the text and the words must be replaced only once. '
|
||||
'Put the removed words and respective ids on the words array of the json in the correct order.'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question["question"]
|
||||
73
ielts_be/services/impl/exam/level/exercises/fill_blanks.py
Normal file
73
ielts_be/services/impl/exam/level/exercises/fill_blanks.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import random
|
||||
|
||||
from ielts_be.configs.constants import GPTModels, TemperatureSettings, EducationalContent
|
||||
from ielts_be.services import ILLMService
|
||||
|
||||
|
||||
class FillBlanks:
|
||||
|
||||
def __init__(self, llm: ILLMService):
|
||||
self._llm = llm
|
||||
|
||||
|
||||
async def gen_fill_blanks(
|
||||
self, start_id: int, quantity: int, size: int = 300, topic=None
|
||||
):
|
||||
if not topic:
|
||||
topic = random.choice(EducationalContent.MTI_TOPICS)
|
||||
print(quantity)
|
||||
print(start_id)
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate a text of at least {size} words about the topic {topic}.'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose exactly {quantity} words (cannot be sequential words) replace '
|
||||
'each with {{id}} (starting from ' + str(start_id) + ' and incrementing), then generate a '
|
||||
'JSON object containing: the modified text, a solutions array with each word\'s correct '
|
||||
'letter (A-D), and a words array containing each id with four options where one is '
|
||||
'the original word (matching the solution) and three are plausible but incorrect '
|
||||
'alternatives that maintain grammatical consistency. '
|
||||
'You cannot use repeated words!' #TODO: Solve this after
|
||||
)
|
||||
}
|
||||
]
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, [], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
return {
|
||||
**question,
|
||||
"type": "fillBlanks",
|
||||
"variant": "mc",
|
||||
"prompt": "Click a blank to select the appropriate word for it.",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fill_blanks_mc_template():
|
||||
return {
|
||||
"text": "",
|
||||
"solutions": [
|
||||
{
|
||||
"id": "",
|
||||
"solution": "<A,B,C or D>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "",
|
||||
"options": {
|
||||
"A": "",
|
||||
"B": "",
|
||||
"C": "",
|
||||
"D": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
from ielts_be.configs.constants import GPTModels, TemperatureSettings
|
||||
from ielts_be.helpers import ExercisesHelper
|
||||
from ielts_be.services import ILLMService
|
||||
|
||||
|
||||
class MultipleChoice:
|
||||
|
||||
def __init__(self, llm: ILLMService, mc_variants: dict):
|
||||
self._llm = llm
|
||||
self._mc_variants = mc_variants
|
||||
|
||||
async def gen_multiple_choice(
|
||||
self, mc_variant: str, quantity: int, start_id: int = 1
|
||||
):
|
||||
mc_template = self._mc_variants[mc_variant]
|
||||
blank_mod = " blank space " if mc_variant == "blank_space" else " "
|
||||
|
||||
gen_multiple_choice_for_text: str = (
|
||||
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
|
||||
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
|
||||
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
|
||||
'punctuation. Make sure every question only has 1 correct answer.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
|
||||
}
|
||||
]
|
||||
|
||||
if mc_variant == "underline":
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'The type of multiple choice in the prompt has wrong words or group of words and the options '
|
||||
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
||||
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
||||
'the boss <u>is</u> nice."\n'
|
||||
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
||||
)
|
||||
})
|
||||
|
||||
questions = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
return ExercisesHelper.fix_exercise_ids(questions, start_id)
|
||||
|
||||
"""
|
||||
if len(question["questions"]) != quantity:
|
||||
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
|
||||
else:
|
||||
if not utas:
|
||||
all_exams = await self._document_store.get_all("level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
||||
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
|
||||
)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Select the appropriate option.",
|
||||
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
|
||||
"type": "multipleChoice",
|
||||
}
|
||||
else:
|
||||
if all_exams is not None:
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
||||
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
|
||||
)
|
||||
response = ExercisesHelper.fix_exercise_ids(question, start_id)
|
||||
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
|
||||
return response
|
||||
"""
|
||||
|
||||
|
||||
91
ielts_be/services/impl/exam/level/exercises/passage_utas.py
Normal file
91
ielts_be/services/impl/exam/level/exercises/passage_utas.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from typing import Optional
|
||||
|
||||
from ielts_be.configs.constants import GPTModels, TemperatureSettings
|
||||
from ielts_be.helpers import ExercisesHelper
|
||||
from ielts_be.services import ILLMService, IReadingService
|
||||
|
||||
|
||||
class PassageUtas:
|
||||
|
||||
def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
|
||||
self._llm = llm
|
||||
self._reading_service = reading_service
|
||||
self._mc_variants = mc_variants
|
||||
|
||||
async def gen_reading_passage_utas(
|
||||
self, start_id, mc_quantity: int, topic: Optional[str], word_size: Optional[int] # sa_quantity: int,
|
||||
):
|
||||
|
||||
passage = await self._reading_service.generate_reading_passage(1, topic, word_size)
|
||||
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
|
||||
mc_exercises["type"] = "multipleChoice"
|
||||
"""
|
||||
exercises: {
|
||||
"shortAnswer": short_answer,
|
||||
"multipleChoice": mc_exercises,
|
||||
},
|
||||
"""
|
||||
return {
|
||||
**mc_exercises,
|
||||
"passage": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
},
|
||||
"mcVariant": "passageUtas"
|
||||
}
|
||||
|
||||
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
|
||||
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
|
||||
f'maximum 3 words per answer, about this text:\n"{text}"'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'The id starts at {start_id}.'
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question["questions"]
|
||||
|
||||
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
|
||||
json_template = self._mc_variants["text_mc_utas"]
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Make sure every question only has 1 correct answer.'
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != mc_quantity:
|
||||
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
|
||||
else:
|
||||
response = ExercisesHelper.fix_exercise_ids(question, start_id)
|
||||
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
|
||||
return response
|
||||
7
ielts_be/services/impl/exam/level/full_exams/__init__.py
Normal file
7
ielts_be/services/impl/exam/level/full_exams/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from .custom import CustomLevelModule
|
||||
from .level_utas import LevelUtas
|
||||
|
||||
__all__ = [
|
||||
"CustomLevelModule",
|
||||
"LevelUtas"
|
||||
]
|
||||
335
ielts_be/services/impl/exam/level/full_exams/custom.py
Normal file
335
ielts_be/services/impl/exam/level/full_exams/custom.py
Normal file
@@ -0,0 +1,335 @@
|
||||
import queue
|
||||
import random
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from ielts_be.configs.constants import CustomLevelExerciseTypes, EducationalContent
|
||||
from ielts_be.services import (
|
||||
ILLMService, ILevelService, IReadingService,
|
||||
IWritingService, IListeningService, ISpeakingService
|
||||
)
|
||||
|
||||
|
||||
class CustomLevelModule:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: ILLMService,
|
||||
level: ILevelService,
|
||||
reading: IReadingService,
|
||||
listening: IListeningService,
|
||||
writing: IWritingService,
|
||||
speaking: ISpeakingService
|
||||
):
|
||||
self._llm = llm
|
||||
self._level = level
|
||||
self._reading = reading
|
||||
self._listening = listening
|
||||
self._writing = writing
|
||||
self._speaking = speaking
|
||||
|
||||
# TODO: I've changed this to retrieve the args from the body request and not request query args
|
||||
async def get_custom_level(self, data: Dict):
|
||||
nr_exercises = int(data.get('nr_exercises'))
|
||||
|
||||
exercise_id = 1
|
||||
response = {
|
||||
"exercises": {},
|
||||
"module": "level"
|
||||
}
|
||||
for i in range(1, nr_exercises + 1, 1):
|
||||
exercise_type = data.get(f'exercise_{i}_type')
|
||||
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
|
||||
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
|
||||
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
|
||||
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
|
||||
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
|
||||
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
|
||||
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
|
||||
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
|
||||
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
|
||||
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
|
||||
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
|
||||
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
|
||||
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
|
||||
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
|
||||
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
|
||||
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
|
||||
|
||||
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"normal", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"blank_space", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"underline", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
|
||||
exercise_qty, exercise_id, exercise_text_size
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
|
||||
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
|
||||
1, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "writing"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
|
||||
2, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "writing"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
1, exercise_topic, exercise_difficulty, exercise_topic_2
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
2, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
3, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
if exercise_ideamatch_qty != -1:
|
||||
exercises.append('ideaMatch')
|
||||
exercise_qty_q.put(exercise_ideamatch_qty)
|
||||
total_qty = total_qty + exercise_ideamatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
if exercise_writeblanksfill_qty != -1:
|
||||
exercises.append('writeBlanksFill')
|
||||
exercise_qty_q.put(exercise_writeblanksfill_qty)
|
||||
total_qty = total_qty + exercise_writeblanksfill_qty
|
||||
if exercise_writeblanksform_qty != -1:
|
||||
exercises.append('writeBlanksForm')
|
||||
exercise_qty_q.put(exercise_writeblanksform_qty)
|
||||
total_qty = total_qty + exercise_writeblanksform_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc3_qty != -1:
|
||||
exercises.append('multipleChoice3Options')
|
||||
exercise_qty_q.put(exercise_mc3_qty)
|
||||
total_qty = total_qty + exercise_mc3_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
if exercise_writeblanksfill_qty != -1:
|
||||
exercises.append('writeBlanksFill')
|
||||
exercise_qty_q.put(exercise_writeblanksfill_qty)
|
||||
total_qty = total_qty + exercise_writeblanksfill_qty
|
||||
if exercise_writeblanksform_qty != -1:
|
||||
exercises.append('writeBlanksForm')
|
||||
exercise_qty_q.put(exercise_writeblanksform_qty)
|
||||
total_qty = total_qty + exercise_writeblanksform_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
|
||||
return response
|
||||
119
ielts_be/services/impl/exam/level/full_exams/level_utas.py
Normal file
119
ielts_be/services/impl/exam/level/full_exams/level_utas.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from ielts_be.services import ILLMService
|
||||
|
||||
|
||||
class LevelUtas:
|
||||
|
||||
|
||||
def __init__(self, llm: ILLMService, level_service, mc_variants: dict):
|
||||
self._llm = llm
|
||||
self._mc_variants = mc_variants
|
||||
self._level_service = level_service
|
||||
|
||||
|
||||
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
|
||||
# Formats
|
||||
mc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the correct word or group of words that completes the sentences.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 1
|
||||
}
|
||||
|
||||
umc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the underlined word or group of words that is not correct.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 2
|
||||
}
|
||||
|
||||
bs_1 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 3
|
||||
}
|
||||
|
||||
bs_2 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 4
|
||||
}
|
||||
|
||||
reading = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and answer the questions below.",
|
||||
"questions": None,
|
||||
"type": "readingExercises",
|
||||
"part": 5
|
||||
}
|
||||
|
||||
all_mc_questions = []
|
||||
|
||||
# PART 1
|
||||
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
|
||||
mc_exercises1 = await self._level_service.gen_multiple_choice(
|
||||
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises1, indent=4))
|
||||
all_mc_questions.append(mc_exercises1)
|
||||
|
||||
# PART 2
|
||||
mc_exercises2 = await self._level_service.gen_multiple_choice(
|
||||
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises2, indent=4))
|
||||
all_mc_questions.append(mc_exercises2)
|
||||
|
||||
# PART 3
|
||||
mc_exercises3 = await self._level_service.gen_multiple_choice(
|
||||
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises3, indent=4))
|
||||
all_mc_questions.append(mc_exercises3)
|
||||
|
||||
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
|
||||
print(json.dumps(mc_exercises, indent=4))
|
||||
mc["questions"] = mc_exercises
|
||||
|
||||
# Underlined mc
|
||||
underlined_mc = await self._level_service.gen_multiple_choice(
|
||||
"underline", 15, 46, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(underlined_mc, indent=4))
|
||||
umc["questions"] = underlined_mc
|
||||
|
||||
# Blank Space text 1
|
||||
blank_space_text_1 = await self._level_service.gen_blank_space_text_utas(12, 61, 250)
|
||||
print(json.dumps(blank_space_text_1, indent=4))
|
||||
bs_1["questions"] = blank_space_text_1
|
||||
|
||||
# Blank Space text 2
|
||||
blank_space_text_2 = await self._level_service.gen_blank_space_text_utas(14, 73, 350)
|
||||
print(json.dumps(blank_space_text_2, indent=4))
|
||||
bs_2["questions"] = blank_space_text_2
|
||||
|
||||
# Reading text
|
||||
reading_text = await self._level_service.gen_reading_passage_utas(87, 10, 4)
|
||||
print(json.dumps(reading_text, indent=4))
|
||||
reading["questions"] = reading_text
|
||||
|
||||
return {
|
||||
"exercises": {
|
||||
"blankSpaceMultipleChoice": mc,
|
||||
"underlinedMultipleChoice": umc,
|
||||
"blankSpaceText1": bs_1,
|
||||
"blankSpaceText2": bs_2,
|
||||
"readingExercises": reading,
|
||||
},
|
||||
"isDiagnostic": diagnostic,
|
||||
"minTimer": min_timer,
|
||||
"module": "level"
|
||||
}
|
||||
137
ielts_be/services/impl/exam/level/mc_variants.json
Normal file
137
ielts_be/services/impl/exam/level/mc_variants.json
Normal file
@@ -0,0 +1,137 @@
|
||||
{
|
||||
"normal": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "And"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "Cat"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "Happy"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "Jump"
|
||||
}
|
||||
],
|
||||
"prompt": "Which of the following is a conjunction?",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"blank_space": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "This"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "Those"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "These"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "That"
|
||||
}
|
||||
],
|
||||
"prompt": "_____ man there is very kind.",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"underline": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "was"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "for work"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "because"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "could"
|
||||
}
|
||||
],
|
||||
"prompt": "I <u>was</u> late <u>for work</u> yesterday <u>because</u> I <u>could</u> start my car.",
|
||||
"solution": "D",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"blank_space_text": {
|
||||
"question": {
|
||||
"words": [
|
||||
{
|
||||
"id": "1",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"text": "text"
|
||||
}
|
||||
},
|
||||
"text_mc_utas": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"prompt": "prompt",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
338
ielts_be/services/impl/exam/level/upload.py
Normal file
338
ielts_be/services/impl/exam/level/upload.py
Normal file
@@ -0,0 +1,338 @@
|
||||
from uuid import uuid4
|
||||
|
||||
import aiofiles
|
||||
import os
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
import pdfplumber
|
||||
from fastapi import UploadFile
|
||||
|
||||
from ielts_be.services import ILLMService
|
||||
from ielts_be.helpers import FileHelper
|
||||
from ielts_be.mappers import LevelMapper
|
||||
|
||||
from ielts_be.dtos.exams.level import Exam
|
||||
from ielts_be.dtos.sheet import Sheet
|
||||
from ielts_be.utils import suppress_loggers
|
||||
|
||||
|
||||
class UploadLevelModule:
|
||||
def __init__(self, openai: ILLMService):
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
async def generate_level_from_file(self, exercises: UploadFile, solutions: Optional[UploadFile]) -> Dict[str, Any] | None:
|
||||
path_id = str(uuid4())
|
||||
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
|
||||
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
|
||||
|
||||
if solutions:
|
||||
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
|
||||
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
|
||||
|
||||
#completion: Coroutine[Any, Any, Exam] = (
|
||||
# self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
|
||||
#)
|
||||
response = await self._html_completion(path_id, solutions is not None)
|
||||
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
|
||||
if response:
|
||||
return self.fix_ids(response.model_dump(exclude_none=True))
|
||||
return None
|
||||
|
||||
|
||||
@staticmethod
|
||||
@suppress_loggers()
|
||||
def _check_pdf_for_images(pdf_path: str) -> bool:
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
if page.images:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _level_json_schema(self):
|
||||
return {
|
||||
"parts": [
|
||||
{
|
||||
"text": {
|
||||
"content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
|
||||
"title": "<this attribute is optional you may exclude it if not required>",
|
||||
},
|
||||
"exercises": [
|
||||
self._multiple_choice_html(),
|
||||
self._passage_blank_space_html()
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
|
||||
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
|
||||
html = await f.read()
|
||||
|
||||
solutions = []
|
||||
if solutions_provided:
|
||||
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
|
||||
solutions_html = await f.read()
|
||||
solutions.append({
|
||||
"role": "user",
|
||||
"content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
|
||||
})
|
||||
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_html(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": html
|
||||
},
|
||||
*solutions
|
||||
],
|
||||
LevelMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
def _gpt_instructions_html(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
|
||||
'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'
|
||||
|
||||
'In the question sheet you will only see 4 types of question:\n'
|
||||
'- blank space multiple choice\n'
|
||||
'- underline multiple choice\n'
|
||||
'- reading passage blank space multiple choice\n'
|
||||
'- reading passage multiple choice\n\n'
|
||||
|
||||
'For the first two types of questions the template is the same but the question prompts differ, '
|
||||
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
|
||||
'multiple "_", in the underline you must include in the prompt the <u></u> to '
|
||||
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
|
||||
|
||||
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
|
||||
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
|
||||
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
|
||||
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
|
||||
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
|
||||
'between paragraphs.\n\n'
|
||||
|
||||
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
|
||||
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
|
||||
'out the best paragraph separation possible.'
|
||||
|
||||
'You will place all the information in a single JSON: '
|
||||
'{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
|
||||
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
|
||||
'text, which contains the reading passages that are required in order to solve the part questions, '
|
||||
'(if there are passages) place them in text.content and if there is a title place it in text.title '
|
||||
'else omit the title field.\n'
|
||||
|
||||
'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
|
||||
'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
|
||||
'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
|
||||
'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
|
||||
' word Part x which is in the html. '
|
||||
'You must strictly adhere to this instruction, do not mistake sections for parts!\n'
|
||||
|
||||
'The templates for the exercises are the following:\n'
|
||||
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
|
||||
f'{self._multiple_choice_html()}\n'
|
||||
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
|
||||
|
||||
'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
|
||||
'without paragraphs or line numbers, with 2 newlines between paragraphs.'
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _multiple_choice_html():
|
||||
return {
|
||||
"type": "multipleChoice",
|
||||
"prompt": "<general instructions for this section>",
|
||||
"questions": [
|
||||
{
|
||||
"id": "<question number as string>",
|
||||
"prompt": "<question text>",
|
||||
"options": [
|
||||
{
|
||||
"id": "<A/B/C/D>",
|
||||
"text": "<option text>"
|
||||
}
|
||||
],
|
||||
"solution": "<correct option letter>",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _passage_blank_space_html():
|
||||
return {
|
||||
"type": "fillBlanks",
|
||||
"variant": "mc",
|
||||
"prompt": "Click a blank to select the appropriate word for it.",
|
||||
"text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question id/number>}} with 2 newlines between paragraphs>"
|
||||
),
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question number>",
|
||||
"solution": "<the option that holds the solution>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "<question number>",
|
||||
"options": {
|
||||
"A": "<a option>",
|
||||
"B": "<b option>",
|
||||
"C": "<c option>",
|
||||
"D": "<d option>"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async def _png_completion(self, path_id: str) -> Exam:
|
||||
FileHelper.pdf_to_png(path_id)
|
||||
|
||||
tmp_files = os.listdir(f'./tmp/{path_id}')
|
||||
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
|
||||
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
|
||||
|
||||
json_schema = {
|
||||
"components": [
|
||||
{"type": "part", "part": "<name or number of the part>"},
|
||||
self._multiple_choice_png(),
|
||||
{"type": "blanksPassage", "text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question number>}} with 2 newlines between paragraphs>"
|
||||
)},
|
||||
{"type": "passage", "context": (
|
||||
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
|
||||
)},
|
||||
self._passage_blank_space_png()
|
||||
]
|
||||
}
|
||||
|
||||
components = []
|
||||
|
||||
for i in range(len(pages)):
|
||||
current_page = pages[i]
|
||||
next_page = pages[i + 1] if i + 1 < len(pages) else None
|
||||
batch = [current_page, next_page] if next_page else [current_page]
|
||||
|
||||
sheet = await self._png_batch(path_id, batch, json_schema)
|
||||
sheet.batch = i + 1
|
||||
components.append(sheet.model_dump())
|
||||
|
||||
batches = {"batches": components}
|
||||
|
||||
return await self._batches_to_exam_completion(batches)
|
||||
|
||||
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_png(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
*FileHelper.b64_pngs(path_id, files)
|
||||
]
|
||||
}
|
||||
],
|
||||
LevelMapper.map_to_sheet,
|
||||
str(json_schema)
|
||||
)
|
||||
|
||||
def _gpt_instructions_png(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
|
||||
'Your current task is to scan english questions sheets.\n\n'
|
||||
|
||||
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
|
||||
'sheet components you will retrieve from the images, the components and their corresponding JSON '
|
||||
'templates are as follows:\n'
|
||||
|
||||
'- Part, a standalone part or part of a section of the question sheet: '
|
||||
'{"type": "part", "part": "<name or number of the part>"}\n'
|
||||
|
||||
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
|
||||
'the prompt field of the template: blanks, underlines and normal. '
|
||||
|
||||
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
|
||||
'In the underlines questions the objective is to pick the words that are incorrect in the given '
|
||||
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
|
||||
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
|
||||
'in the order they appear in the question for the options A to D, disreguard options that might be '
|
||||
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
|
||||
'In normal you just leave the question as is. '
|
||||
|
||||
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
|
||||
|
||||
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
|
||||
'blanks represented by a (question id) followed by a line, you must format these types of reading '
|
||||
'passages to be only the text with the brackets that have the question id and line replaced with '
|
||||
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
|
||||
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
|
||||
'and place 2 newlines between paragraphs. '
|
||||
|
||||
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
|
||||
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
|
||||
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
|
||||
|
||||
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
|
||||
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
|
||||
|
||||
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
|
||||
'options with the question id and the options from a to d. The template is: '
|
||||
f'{self._passage_blank_space_png()}\n'
|
||||
|
||||
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
|
||||
'reading passages are cut off don\'t include them in the JSON.'
|
||||
)
|
||||
}
|
||||
|
||||
def _multiple_choice_png(self):
|
||||
multiple_choice = self._multiple_choice_html()["questions"][0]
|
||||
multiple_choice["type"] = "multipleChoice"
|
||||
multiple_choice.pop("solution")
|
||||
return multiple_choice
|
||||
|
||||
def _passage_blank_space_png(self):
|
||||
passage_blank_space = self._passage_blank_space_html()["words"][0]
|
||||
passage_blank_space["type"] = "fillBlanks"
|
||||
return passage_blank_space
|
||||
|
||||
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_html(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": str(batches)
|
||||
}
|
||||
],
|
||||
LevelMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def fix_ids(response):
|
||||
counter = 1
|
||||
for part in response["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["type"] == "multipleChoice":
|
||||
for question in exercise["questions"]:
|
||||
question["id"] = counter
|
||||
counter += 1
|
||||
if exercise["type"] == "fillBlanks":
|
||||
for i in range(len(exercise["words"])):
|
||||
exercise["words"][i]["id"] = counter
|
||||
exercise["solutions"][i]["id"] = counter
|
||||
counter += 1
|
||||
return response
|
||||
Reference in New Issue
Block a user