\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index d56657a..6601cfb 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,10 @@
-
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index 94a25f7..35eb1dd 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -1,6 +1,6 @@
-
+
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index efbac17..5c9b4e8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,17 @@ ENV APP_HOME /app
WORKDIR $APP_HOME
COPY . ./
-RUN apt update && apt install -y ffmpeg
+RUN apt update && apt install -y \
+ ffmpeg \
+ poppler-utils \
+ texlive-latex-base \
+ texlive-fonts-recommended \
+ texlive-latex-extra \
+ texlive-xetex \
+ pandoc \
+ librsvg2-bin \
+ && rm -rf /var/lib/apt/lists/*
+
# Install production dependencies.
RUN pip install --no-cache-dir -r requirements.txt
diff --git a/app.py b/app.py
index 684a422..64fe488 100644
--- a/app.py
+++ b/app.py
@@ -5,17 +5,23 @@ import firebase_admin
from firebase_admin import credentials
from flask import Flask, request
from flask_jwt_extended import JWTManager, jwt_required
+from sentence_transformers import SentenceTransformer
from helper.api_messages import *
from helper.exam_variant import ExamVariant
from helper.exercises import *
from helper.file_helper import delete_files_older_than_one_day
from helper.firebase_helper import *
+from helper.gpt_zero import GPTZero
from helper.heygen_api import create_video, create_videos_and_save_to_db
from helper.openai_interface import *
from helper.question_templates import *
from helper.speech_to_text_helper import *
from heygen.AvatarEnum import AvatarEnum
+from modules import GPT
+from modules.training_content import TrainingContentService, TrainingContentKnowledgeBase
+from modules.upload_level import UploadLevelService
+
load_dotenv()
@@ -30,6 +36,18 @@ FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
firebase_admin.initialize_app(cred)
+gpt_zero = GPTZero(os.getenv('GPT_ZERO_API_KEY'))
+
+# Training Content Dependencies
+embeddings = SentenceTransformer('all-MiniLM-L6-v2')
+kb = TrainingContentKnowledgeBase(embeddings)
+kb.load_indices_and_metadata()
+open_ai = GPT(OpenAI())
+firestore_client = firestore.client()
+tc_service = TrainingContentService(kb, open_ai, firestore_client)
+
+upload_level_service = UploadLevelService(open_ai)
+
thread_event = threading.Event()
# Configure logging
@@ -52,24 +70,7 @@ def get_listening_section_1_question():
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
-
- processed_conversation = generate_listening_1_conversation(topic)
-
- app.logger.info("Generated conversation: " + str(processed_conversation))
-
- start_id = 1
- exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
- number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": processed_conversation,
- "difficulty": difficulty
- }
+ return gen_listening_section_1(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -84,22 +85,7 @@ def get_listening_section_2_question():
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
-
- monologue = generate_listening_2_monologue(topic)
-
- app.logger.info("Generated monologue: " + str(monologue))
- start_id = 11
- exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": monologue,
- "difficulty": difficulty
- }
+ return gen_listening_section_2(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -114,24 +100,7 @@ def get_listening_section_3_question():
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
-
- processed_conversation = generate_listening_3_conversation(topic)
-
- app.logger.info("Generated conversation: " + str(processed_conversation))
-
- start_id = 21
- exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
- number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": processed_conversation,
- "difficulty": difficulty
- }
+ return gen_listening_section_3(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -146,22 +115,7 @@ def get_listening_section_4_question():
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
-
- monologue = generate_listening_4_monologue(topic)
-
- app.logger.info("Generated monologue: " + str(monologue))
- start_id = 31
- exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": monologue,
- "difficulty": difficulty
- }
+ return gen_listening_section_4(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -176,7 +130,7 @@ def save_listening():
difficulty = data.get('difficulty', random.choice(difficulties))
template = getListeningTemplate()
template['difficulty'] = difficulty
- id = str(uuid.uuid4())
+ id = data.get('id', str(uuid.uuid4()))
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
@@ -221,10 +175,22 @@ def grade_writing_task_1():
'comment': "The answer does not contain enough english words.",
'overall': 0,
'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
+ 'Task Achievement': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Coherence and Cohesion': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Lexical Resource': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Grammatical Range and Accuracy': {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
elif not has_x_words(answer, 100):
@@ -232,42 +198,79 @@ def grade_writing_task_1():
'comment': "The answer is insufficient and too small to be graded.",
'overall': 0,
'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
+ 'Task Achievement': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Coherence and Cohesion': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Lexical Resource': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Grammatical Range and Accuracy': {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
else:
+ json_format = {
+ "comment": "comment about student's response quality",
+ "overall": 0.0,
+ "task_response": {
+ "Task Achievement": {
+ "grade": 0.0,
+ "comment": "comment about Task Achievement of the student's response"
+ },
+ "Coherence and Cohesion": {
+ "grade": 0.0,
+ "comment": "comment about Coherence and Cohesion of the student's response"
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "comment about Lexical Resource of the student's response"
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "comment about Grammatical Range and Accuracy of the student's response"
+ }
+ }
+ }
+
messages = [
{
"role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"perfect_answer": "example perfect answer", "comment": '
- '"comment about answer quality", "overall": 0.0, "task_response": '
- '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
- '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
+ "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(
+ json_format))
},
{
"role": "user",
"content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
- 'Additionally, provide an exemplary answer with a minimum of 150 words, along with a '
- 'detailed commentary highlighting both strengths and weaknesses in the response. '
+ 'Additionally, provide a detailed commentary highlighting both strengths and '
+ 'weaknesses in the response. '
'\n Question: "' + question + '" \n Answer: "' + answer + '"')
},
{
"role": "user",
- "content": 'The perfect answer must have at least 150 words.'
+ "content": ('Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
+ '"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count,
["comment"],
GRADING_TEMPERATURE)
+ response["perfect_answer"] = get_perfect_answer(question, 150)["perfect_answer"]
response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = get_fixed_text(answer)
+ ai_detection = gpt_zero.run_detection(answer)
+ if ai_detection is not None:
+ response['ai_detection'] = ai_detection
return response
except Exception as e:
return str(e)
@@ -279,36 +282,15 @@ def get_writing_task_1_general_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics))
try:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"prompt": "prompt content"}')
- },
- {
- "role": "user",
- "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
- 'student to compose a letter. The prompt should present a specific scenario or situation, '
- 'based on the topic of "' + topic + '", requiring the student to provide information, '
- 'advice, or instructions within the letter. '
- 'Make sure that the generated prompt is '
- 'of ' + difficulty + 'difficulty and does not contain '
- 'forbidden subjects in muslim '
- 'countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt",
- GEN_QUESTION_TEMPERATURE)
- return {
- "question": response["prompt"].strip(),
- "difficulty": difficulty,
- "topic": topic
- }
+ return gen_writing_task_1(topic, difficulty)
except Exception as e:
return str(e)
+def add_newline_before_hyphen(s):
+ return s.replace(" -", "\n-")
+
+
@app.route('/writing_task2', methods=['POST'])
@jwt_required()
def grade_writing_task_2():
@@ -321,10 +303,22 @@ def grade_writing_task_2():
'comment': "The answer does not contain enough english words.",
'overall': 0,
'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
+ 'Task Achievement': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Coherence and Cohesion': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Lexical Resource': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Grammatical Range and Accuracy': {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
elif not has_x_words(answer, 180):
@@ -332,53 +326,88 @@ def grade_writing_task_2():
'comment': "The answer is insufficient and too small to be graded.",
'overall': 0,
'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
+ 'Task Achievement': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Coherence and Cohesion': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Lexical Resource': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Grammatical Range and Accuracy': {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
else:
+ json_format = {
+ "comment": "comment about student's response quality",
+ "overall": 0.0,
+ "task_response": {
+ "Task Achievement": {
+ "grade": 0.0,
+ "comment": "comment about Task Achievement of the student's response"
+ },
+ "Coherence and Cohesion": {
+ "grade": 0.0,
+ "comment": "comment about Coherence and Cohesion of the student's response"
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "comment about Lexical Resource of the student's response"
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "comment about Grammatical Range and Accuracy of the student's response"
+ }
+ }
+ }
+
messages = [
{
"role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"perfect_answer": "example perfect answer", "comment": '
- '"comment about answer quality", "overall": 0.0, "task_response": '
- '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
- '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
+ "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(
+ json_format))
},
{
"role": "user",
"content": (
- 'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide an '
- 'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting '
- 'both strengths and weaknesses in the response.'
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
- },
- {
- "role": "user",
- "content": 'The perfect answer must have at least 250 words.'
+ 'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a '
+ 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
+ 'assign a score of 0 if the response fails to address the question. Additionally, provide'
+ ' a detailed commentary highlighting '
+ 'both strengths and weaknesses in the response.'
+ '\n Question: "' + question + '" \n Answer: "' + answer + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
GEN_QUESTION_TEMPERATURE)
+ response["perfect_answer"] = get_perfect_answer(question, 250)["perfect_answer"]
response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = get_fixed_text(answer)
+ ai_detection = gpt_zero.run_detection(answer)
+ if ai_detection is not None:
+ response['ai_detection'] = ai_detection
return response
except Exception as e:
return str(e)
def fix_writing_overall(overall: float, task_response: dict):
- if overall > max(task_response.values()) or overall < min(task_response.values()):
- total_sum = sum(task_response.values())
- average = total_sum / len(task_response.values())
+ grades = [category["grade"] for category in task_response.values()]
+
+ if overall > max(grades) or overall < min(grades):
+ total_sum = sum(grades)
+ average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
+
return overall
@@ -388,28 +417,7 @@ def get_writing_task_2_general_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics))
try:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"prompt": "prompt content"}')
- },
- {
- "role": "user",
- "content": (
- 'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate '
- 'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". '
- 'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or '
- 'examples, and present a well-rounded argument before concluding with their personal opinion on the subject.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE)
- return {
- "question": response["prompt"].strip(),
- "difficulty": difficulty,
- "topic": topic
- }
+ return gen_writing_task_2(topic, difficulty)
except Exception as e:
return str(e)
@@ -419,48 +427,56 @@ def get_writing_task_2_general_question():
def grade_speaking_task_1():
request_id = uuid.uuid4()
delete_files_older_than_one_day(AUDIO_FILES_PATH)
- sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
logging.info("POST - speaking_task_1 - Received request to grade speaking task 1. "
"Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json()))
try:
data = request.get_json()
- question = data.get('question')
- answer_firebase_path = data.get('answer')
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + answer_firebase_path)
- download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
+ answers = data.get('answers')
+ text_answers = []
+ perfect_answers = []
logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name)
+ request_id) + " - Received " + str(len(answers)) + " total answers.")
- answer = speech_to_text(sound_file_name)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer)
+ for item in answers:
+ sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
- if has_x_words(answer, 20):
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- },
- {
- "role": "user",
- "content": (
- 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide '
- 'detailed commentary highlighting both strengths and weaknesses in the response.'
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + item["answer"])
+ download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
+ logging.info("POST - speaking_task_1 - " + str(
+ request_id) + " - Downloaded file " + item["answer"] + " to " + sound_file_name)
+
+ answer_text = speech_to_text(sound_file_name)
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer_text)
+
+ text_answers.append(answer_text)
+ item["answer"] = answer_text
+ os.remove(sound_file_name)
+
+ if not has_x_words(answer_text, 20):
+ logging.info("POST - speaking_task_1 - " + str(
+ request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer_text)
+ return {
+ "comment": "The audio recorded does not contain enough english words to be graded.",
+ "overall": 0,
+ "task_response": {
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": ""
+ }
+ }
}
- ]
- token_count = count_total_tokens(messages)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"],
- GRADING_TEMPERATURE)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response))
perfect_answer_messages = [
{
@@ -472,88 +488,119 @@ def grade_speaking_task_1():
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
- 'Speaking Part 1 question: "' + question + '"')
+ 'Speaking Part 1 question: "' + item["question"] + '"')
+ },
+ {
+ "role": "user",
+ "content": 'The answer must be 2 or 3 sentences long.'
}
]
+
token_count = count_total_tokens(perfect_answer_messages)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.")
- response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO,
- perfect_answer_messages,
- token_count,
- ["answer"],
- GEN_QUESTION_TEMPERATURE)["answer"]
logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - Perfect answer: " + response['perfect_answer'])
+ request_id) + " - Requesting perfect answer for question: " + item["question"])
+ perfect_answers.append(make_openai_call(GPT_4_O,
+ perfect_answer_messages,
+ token_count,
+ ["answer"],
+ GEN_QUESTION_TEMPERATURE))
- response['transcript'] = answer
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting fixed text.")
- response['fixed_text'] = get_speaking_corrections(answer)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
-
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
- response["task_response"]["Lexical Resource"] + response["task_response"][
- "Grammatical Range and Accuracy"] + response["task_response"][
- "Pronunciation"]) / 4, 1)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response))
- return response
- else:
- logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer)
- return {
- "comment": "The audio recorded does not contain enough english words to be graded.",
- "overall": 0,
- "task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
+ json_format = {
+ "comment": "comment about answers quality",
+ "overall": 0.0,
+ "task_response": {
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": "comment about fluency and coherence"
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "comment about lexical resource"
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "comment about grammatical range and accuracy"
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": "comment about pronunciation on the transcribed answers"
}
}
+ }
+
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Formatting answers and questions for prompt.")
+ formatted_text = ""
+ for i, entry in enumerate(answers, start=1):
+ formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
+ formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
+ logging.info("POST - speaking_task_1 - " + str(
+ request_id) + " - Formatted answers and questions for prompt: " + formatted_text)
+
+ grade_message = (
+ 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a '
+ 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
+ 'assign a score of 0 if the response fails to address the question. Additionally, provide '
+ 'detailed commentary highlighting both strengths and weaknesses in the response.'
+ "\n\n The questions and answers are: \n\n'" + formatted_text)
+
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+ },
+ {
+ "role": "user",
+ "content": grade_message
+ },
+ {
+ "role": "user",
+ "content": 'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
+ 'student that they should be.'
+ },
+ {
+ "role": "user",
+ "content": 'For pronunciations act as if you heard the answers and they were transcripted as you heard them.'
+ },
+ {
+ "role": "user",
+ "content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.")
+ response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
+ GRADING_TEMPERATURE)
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answers graded: " + str(response))
+
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Adding perfect answers to response.")
+ for i, answer in enumerate(perfect_answers, start=1):
+ response['perfect_answer_' + str(i)] = answer
+
+ logging.info("POST - speaking_task_1 - " + str(
+ request_id) + " - Adding transcript and fixed texts to response.")
+ for i, answer in enumerate(text_answers, start=1):
+ response['transcript_' + str(i)] = answer
+ response['fixed_text_' + str(i)] = get_speaking_corrections(answer)
+
+ response["overall"] = fix_speaking_overall(response["overall"], response["task_response"])
+
+ logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response))
+ return response
except Exception as e:
- os.remove(sound_file_name)
return str(e), 400
@app.route('/speaking_task_1', methods=['GET'])
@jwt_required()
def get_speaking_task_1_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
+ difficulty = request.args.get("difficulty", default="easy")
+ first_topic = request.args.get("first_topic", default=random.choice(mti_topics))
+ second_topic = request.args.get("second_topic", default=random.choice(mti_topics))
+
try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "question": "question"}')
- },
- {
- "role": "user",
- "content": (
- 'Craft a thought-provoking question of ' + difficulty + ' difficulty for IELTS Speaking Part 1 '
- 'that encourages candidates to delve deeply into '
- 'personal experiences, preferences, or insights on the topic '
- 'of "' + topic + '". Instruct the candidate '
- 'to offer not only detailed '
- 'descriptions but also provide '
- 'nuanced explanations, examples, '
- 'or anecdotes to enrich their response. '
- 'Make sure that the generated question '
- 'does not contain forbidden subjects in '
- 'muslim countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, ["topic"],
- GEN_QUESTION_TEMPERATURE)
- response["type"] = 1
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
+ return gen_speaking_part_1(first_topic, second_topic, difficulty)
except Exception as e:
return str(e)
@@ -579,15 +626,38 @@ def grade_speaking_task_2():
answer = speech_to_text(sound_file_name)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Transcripted answer: " + answer)
+ json_format = {
+ "comment": "extensive comment about answer quality",
+ "overall": 0.0,
+ "task_response": {
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": "extensive comment about fluency and coherence, use examples to justify the grade "
+ "awarded."
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "extensive comment about grammatical range and accuracy, use examples to justify the "
+ "grade awarded."
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": "extensive comment about pronunciation on the transcribed answer, use examples to "
+ "justify the grade awarded."
+ }
+ }
+ }
+
if has_x_words(answer, 20):
messages = [
{
"role": "system",
"content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
},
{
"role": "user",
@@ -597,13 +667,17 @@ def grade_speaking_task_2():
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
'\n Question: "' + question + '" \n Answer: "' + answer + '"')
+ },
+ {
+ "role": "user",
+ "content": 'Address the student as "you"'
}
]
token_count = count_total_tokens(messages)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting grading of the answer.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count,["comment"],
- GRADING_TEMPERATURE)
+ response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
+ GRADING_TEMPERATURE)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Answer graded: " + str(response))
perfect_answer_messages = [
@@ -636,11 +710,7 @@ def grade_speaking_task_2():
response['fixed_text'] = get_speaking_corrections(answer)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
- response["task_response"]["Lexical Resource"] + response["task_response"][
- "Grammatical Range and Accuracy"] + response["task_response"][
- "Pronunciation"]) / 4, 1)
+ response["overall"] = fix_speaking_overall(response["overall"], response["task_response"])
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Final response: " + str(response))
return response
@@ -651,10 +721,22 @@ def grade_speaking_task_2():
"comment": "The audio recorded does not contain enough english words to be graded.",
"overall": 0,
"task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
except Exception as e:
@@ -667,36 +749,9 @@ def grade_speaking_task_2():
def get_speaking_task_2_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics))
+
try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}')
- },
- {
- "role": "user",
- "content": (
- 'Create a question of ' + difficulty + ' difficulty for IELTS Speaking Part 2 '
- 'that encourages candidates to narrate a '
- 'personal experience or story related to the topic '
- 'of "' + topic + '". Include 3 prompts that '
- 'guide the candidate to describe '
- 'specific aspects of the experience, '
- 'such as details about the situation, '
- 'their actions, and the reasons it left a '
- 'lasting impression. Make sure that the '
- 'generated question does not contain '
- 'forbidden subjects in muslim countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
- response["type"] = 2
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
+ return gen_speaking_part_2(topic, difficulty)
except Exception as e:
return str(e)
@@ -706,33 +761,9 @@ def get_speaking_task_2_question():
def get_speaking_task_3_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "questions": ["question", "question", "question"]}')
- },
- {
- "role": "user",
- "content": (
- 'Formulate a set of 3 questions of ' + difficulty + ' difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a '
- 'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring '
- 'they explore various aspects, perspectives, and implications related to the topic.'
- 'Make sure that the generated question does not contain forbidden subjects in muslim countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
- # Remove the numbers from the questions only if the string starts with a number
- response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for
- question in response["questions"]]
- response["type"] = 3
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
+ try:
+ return gen_speaking_part_3(topic, difficulty)
except Exception as e:
return str(e)
@@ -772,10 +803,22 @@ def grade_speaking_task_3():
"comment": "The audio recorded does not contain enough english words to be graded.",
"overall": 0,
"task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
@@ -801,22 +844,28 @@ def grade_speaking_task_3():
["answer"],
GEN_QUESTION_TEMPERATURE))
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
+ json_format = {
+ "comment": "extensive comment about answer quality",
+ "overall": 0.0,
+ "task_response": {
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": "extensive comment about fluency and coherence, use examples to justify the grade awarded."
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "extensive comment about grammatical range and accuracy, use examples to justify the grade awarded."
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": "extensive comment about pronunciation on the transcribed answer, use examples to justify the grade awarded."
+ }
}
- ]
- message = (
- "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a "
- "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
- "assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
- "commentary highlighting both strengths and weaknesses in the response."
- "\n\n The questions and answers are: \n\n'")
+ }
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Formatting answers and questions for prompt.")
formatted_text = ""
@@ -826,17 +875,41 @@ def grade_speaking_task_3():
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - Formatted answers and questions for prompt: " + formatted_text)
- message += formatted_text
+ grade_message = (
+ "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a "
+ "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
+ "assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
+ "commentary highlighting both strengths and weaknesses in the response."
+ "\n\n The questions and answers are: \n\n'")
- messages.append({
- "role": "user",
- "content": message
- })
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+ },
+ {
+ "role": "user",
+ "content": grade_message
+ },
+ {
+ "role": "user",
+ "content": 'Address the student as "you" and pay special attention to coherence between the answers.'
+ },
+ {
+ "role": "user",
+ "content": 'For pronunciations act as if you heard the answers and they were transcripted as you heard them.'
+ },
+ {
+ "role": "user",
+ "content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
+ }
+ ]
token_count = count_total_tokens(messages)
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Requesting grading of the answers.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], GRADING_TEMPERATURE)
+ response = make_openai_call(GPT_4_O, messages, token_count, ["comment"], GRADING_TEMPERATURE)
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Answers graded: " + str(response))
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Adding perfect answers to response.")
@@ -848,16 +921,25 @@ def grade_speaking_task_3():
for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = get_speaking_corrections(answer)
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["task_response"][
- "Lexical Resource"] + response["task_response"]["Grammatical Range and Accuracy"] +
- response["task_response"]["Pronunciation"]) / 4, 1)
+ response["overall"] = fix_speaking_overall(response["overall"], response["task_response"])
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Final response: " + str(response))
return response
except Exception as e:
return str(e), 400
+def fix_speaking_overall(overall: float, task_response: dict):
+ grades = [category["grade"] for category in task_response.values()]
+
+ if overall > max(grades) or overall < min(grades):
+ total_sum = sum(grades)
+ average = total_sum / len(grades)
+ rounded_average = round(average, 0)
+ return rounded_average
+
+ return overall
+
+
@app.route('/speaking', methods=['POST'])
@jwt_required()
def save_speaking():
@@ -890,21 +972,109 @@ def save_speaking():
return str(e)
-@app.route("/speaking/generate_speaking_video", methods=['POST'])
+@app.route("/speaking/generate_video_1", methods=['POST'])
@jwt_required()
-def generate_speaking_video():
+def generate_video_1():
+ try:
+ data = request.get_json()
+ sp1_questions = []
+ avatar = data.get("avatar", random.choice(list(AvatarEnum)).value)
+
+ request_id = str(uuid.uuid4())
+ logging.info("POST - generate_video_1 - Received request to generate video 1. "
+ "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(
+ request.get_json()))
+
+ id_to_name = {
+ "5912afa7c77c47d3883af3d874047aaf": "MATTHEW",
+ "9e58d96a383e4568a7f1e49df549e0e4": "VERA",
+ "d2cdd9c0379a4d06ae2afb6e5039bd0c": "EDWARD",
+ "045cb5dcd00042b3a1e4f3bc1c12176b": "TANYA",
+ "1ae1e5396cc444bfad332155fdb7a934": "KAYLA",
+ "0ee6aa7cc1084063a630ae514fccaa31": "JEROME",
+ "5772cff935844516ad7eeff21f839e43": "TYLER",
+
+ }
+
+ standard_questions = [
+ "Hello my name is " + id_to_name.get(avatar) + ", what is yours?",
+ "Do you work or do you study?"
+ ]
+ questions = standard_questions + data["questions"]
+ logging.info("POST - generate_video_1 - " + str(request_id) + " - Creating videos for speaking part 1.")
+ for question in questions:
+ logging.info("POST - generate_video_1 - " + str(request_id) + " - Creating video for question: " + question)
+ result = create_video(question, avatar)
+ logging.info("POST - generate_video_1 - " + str(request_id) + " - Video created: " + result)
+ if result is not None:
+ sound_file_path = VIDEO_FILES_PATH + result
+ firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
+ logging.info(
+ "POST - generate_video_1 - " + str(
+ request_id) + " - Uploading video to firebase: " + firebase_file_path)
+ url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+ logging.info(
+ "POST - generate_video_1 - " + str(
+ request_id) + " - Uploaded video to firebase: " + url)
+ video = {
+ "text": question,
+ "video_path": firebase_file_path,
+ "video_url": url
+ }
+ sp1_questions.append(video)
+ else:
+ logging.error("POST - generate_video_1 - " + str(
+ request_id) + " - Failed to create video for part 1 question: " + question)
+
+ response = {
+ "prompts": sp1_questions,
+ "first_title": data["first_topic"],
+ "second_title": data["second_topic"],
+ "type": "interactiveSpeaking",
+ "id": uuid.uuid4()
+ }
+ logging.info(
+ "POST - generate_video_1 - " + str(
+ request_id) + " - Finished creating videos for speaking part 1: " + str(response))
+ return response
+ except Exception as e:
+ return str(e)
+
+
+@app.route("/speaking/generate_video_2", methods=['POST'])
+@jwt_required()
+def generate_video_2():
try:
data = request.get_json()
avatar = data.get("avatar", random.choice(list(AvatarEnum)).value)
prompts = data.get("prompts", [])
question = data.get("question")
- if len(prompts) > 0:
- question = question + " In your answer you should consider: " + " ".join(prompts)
- sp1_result = create_video(question, avatar)
- if sp1_result is not None:
- sound_file_path = VIDEO_FILES_PATH + sp1_result
- firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
+ suffix = data.get("suffix", "")
+
+ # Removed as the examiner should not say what is on the card.
+ # question = question + " In your answer you should consider: " + " ".join(prompts) + suffix
+ question = question + "\nYou have 1 minute to take notes."
+
+ request_id = str(uuid.uuid4())
+ logging.info("POST - generate_video_2 - Received request to generate video 2. "
+ "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(
+ request.get_json()))
+
+ logging.info("POST - generate_video_2 - " + str(request_id) + " - Creating video for speaking part 2.")
+ logging.info("POST - generate_video_2 - " + str(request_id) + " - Creating video for question: " + question)
+ result = create_video(question, avatar)
+ logging.info("POST - generate_video_2 - " + str(request_id) + " - Video created: " + result)
+
+ if result is not None:
+ sound_file_path = VIDEO_FILES_PATH + result
+ firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
+ logging.info(
+ "POST - generate_video_2 - " + str(
+ request_id) + " - Uploading video to firebase: " + firebase_file_path)
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+ logging.info(
+ "POST - generate_video_2 - " + str(
+ request_id) + " - Uploaded video to firebase: " + url)
sp1_video_path = firebase_file_path
sp1_video_url = url
@@ -915,31 +1085,47 @@ def generate_speaking_video():
"video_url": sp1_video_url,
"video_path": sp1_video_path,
"type": "speaking",
- "id": uuid.uuid4()
+ "id": uuid.uuid4(),
+ "suffix": suffix
}
else:
- app.logger.error("Failed to create video for part 1 question: " + data["question"])
- return str("Failed to create video for part 1 question: " + data["question"])
+ logging.error("POST - generate_video_2 - " + str(
+ request_id) + " - Failed to create video for part 2 question: " + question)
+ return str("Failed to create video for part 2 question: " + data["question"])
except Exception as e:
return str(e)
-@app.route("/speaking/generate_interactive_video", methods=['POST'])
+@app.route("/speaking/generate_video_3", methods=['POST'])
@jwt_required()
-def generate_interactive_video():
+def generate_video_3():
try:
data = request.get_json()
sp3_questions = []
avatar = data.get("avatar", random.choice(list(AvatarEnum)).value)
- app.logger.info('Creating videos for speaking part 3')
+ request_id = str(uuid.uuid4())
+ logging.info("POST - generate_video_3 - Received request to generate video 3. "
+ "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(
+ request.get_json()))
+
+ logging.info("POST - generate_video_3 - " + str(request_id) + " - Creating videos for speaking part 3.")
for question in data["questions"]:
+ logging.info("POST - generate_video_3 - " + str(request_id) + " - Creating video for question: " + question)
result = create_video(question, avatar)
+ logging.info("POST - generate_video_3 - " + str(request_id) + " - Video created: " + result)
+
if result is not None:
sound_file_path = VIDEO_FILES_PATH + result
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
+ logging.info(
+ "POST - generate_video_3 - " + str(
+ request_id) + " - Uploading video to firebase: " + firebase_file_path)
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+ logging.info(
+ "POST - generate_video_3 - " + str(
+ request_id) + " - Uploaded video to firebase: " + url)
video = {
"text": question,
"video_path": firebase_file_path,
@@ -947,14 +1133,19 @@ def generate_interactive_video():
}
sp3_questions.append(video)
else:
- app.app.logger.error("Failed to create video for part 3 question: " + question)
+ logging.error("POST - generate_video_3 - " + str(
+ request_id) + " - Failed to create video for part 3 question: " + question)
- return {
+ response = {
"prompts": sp3_questions,
"title": data["topic"],
"type": "interactiveSpeaking",
"id": uuid.uuid4()
}
+ logging.info(
+ "POST - generate_video_3 - " + str(
+ request_id) + " - Finished creating videos for speaking part 3: " + str(response))
+ return response
except Exception as e:
return str(e)
@@ -967,7 +1158,7 @@ def get_reading_passage_1_question():
topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_1(topic, req_exercises, difficulty)
+ return gen_reading_passage_1(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -980,7 +1171,7 @@ def get_reading_passage_2_question():
topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_2(topic, req_exercises, difficulty)
+ return gen_reading_passage_2(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -993,7 +1184,7 @@ def get_reading_passage_3_question():
topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises')
difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_3(topic, req_exercises, difficulty)
+ return gen_reading_passage_3(topic, difficulty, req_exercises)
except Exception as e:
return str(e)
@@ -1013,6 +1204,7 @@ def get_level_exam():
except Exception as e:
return str(e)
+
@app.route('/level_utas', methods=['GET'])
@jwt_required()
def get_level_utas():
@@ -1115,6 +1307,355 @@ def get_level_utas():
return str(e)
+from enum import Enum
+
+
+class CustomLevelExerciseTypes(Enum):
+ MULTIPLE_CHOICE_4 = "multiple_choice_4"
+ MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
+ MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
+ BLANK_SPACE_TEXT = "blank_space_text"
+ READING_PASSAGE_UTAS = "reading_passage_utas"
+ WRITING_LETTER = "writing_letter"
+ WRITING_2 = "writing_2"
+ SPEAKING_1 = "speaking_1"
+ SPEAKING_2 = "speaking_2"
+ SPEAKING_3 = "speaking_3"
+ READING_1 = "reading_1"
+ READING_2 = "reading_2"
+ READING_3 = "reading_3"
+ LISTENING_1 = "listening_1"
+ LISTENING_2 = "listening_2"
+ LISTENING_3 = "listening_3"
+ LISTENING_4 = "listening_4"
+
+
+@app.route('/custom_level', methods=['GET'])
+@jwt_required()
+def get_custom_level():
+ nr_exercises = int(request.args.get('nr_exercises'))
+
+ exercise_id = 1
+ response = {
+ "exercises": {},
+ "module": "level"
+ }
+ for i in range(1, nr_exercises + 1, 1):
+ exercise_type = request.args.get('exercise_' + str(i) + '_type')
+ exercise_difficulty = request.args.get('exercise_' + str(i) + '_difficulty',
+ random.choice(['easy', 'medium', 'hard']))
+ exercise_qty = int(request.args.get('exercise_' + str(i) + '_qty', -1))
+ exercise_topic = request.args.get('exercise_' + str(i) + '_topic', random.choice(topics))
+ exercise_topic_2 = request.args.get('exercise_' + str(i) + '_topic_2', random.choice(topics))
+ exercise_text_size = int(request.args.get('exercise_' + str(i) + '_text_size', 700))
+ exercise_sa_qty = int(request.args.get('exercise_' + str(i) + '_sa_qty', -1))
+ exercise_mc_qty = int(request.args.get('exercise_' + str(i) + '_mc_qty', -1))
+ exercise_mc3_qty = int(request.args.get('exercise_' + str(i) + '_mc3_qty', -1))
+ exercise_fillblanks_qty = int(request.args.get('exercise_' + str(i) + '_fillblanks_qty', -1))
+ exercise_writeblanks_qty = int(request.args.get('exercise_' + str(i) + '_writeblanks_qty', -1))
+ exercise_writeblanksquestions_qty = int(
+ request.args.get('exercise_' + str(i) + '_writeblanksquestions_qty', -1))
+ exercise_writeblanksfill_qty = int(request.args.get('exercise_' + str(i) + '_writeblanksfill_qty', -1))
+ exercise_writeblanksform_qty = int(request.args.get('exercise_' + str(i) + '_writeblanksform_qty', -1))
+ exercise_truefalse_qty = int(request.args.get('exercise_' + str(i) + '_truefalse_qty', -1))
+ exercise_paragraphmatch_qty = int(request.args.get('exercise_' + str(i) + '_paragraphmatch_qty', -1))
+ exercise_ideamatch_qty = int(request.args.get('exercise_' + str(i) + '_ideamatch_qty', -1))
+
+ if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
+ response["exercises"]["exercise_" + str(i)] = {}
+ response["exercises"]["exercise_" + str(i)]["questions"] = []
+ response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ response["exercises"]["exercise_" + str(i)]["questions"].extend(
+ generate_level_mc(exercise_id, qty,
+ response["exercises"]["exercise_" + str(i)]["questions"])["questions"])
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
+ response["exercises"]["exercise_" + str(i)] = {}
+ response["exercises"]["exercise_" + str(i)]["questions"] = []
+ response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ response["exercises"]["exercise_" + str(i)]["questions"].extend(
+ gen_multiple_choice_blank_space_utas(qty, exercise_id,
+ response["exercises"]["exercise_" + str(i)]["questions"])[
+ "questions"])
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
+ response["exercises"]["exercise_" + str(i)] = {}
+ response["exercises"]["exercise_" + str(i)]["questions"] = []
+ response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ response["exercises"]["exercise_" + str(i)]["questions"].extend(
+ gen_multiple_choice_underlined_utas(qty, exercise_id,
+ response["exercises"]["exercise_" + str(i)]["questions"])[
+ "questions"])
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
+ response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
+ exercise_text_size)
+ response["exercises"]["exercise_" + str(i)]["type"] = "blankSpaceText"
+ exercise_id = exercise_id + exercise_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
+ response["exercises"]["exercise_" + str(i)] = gen_reading_passage_utas(exercise_id, exercise_sa_qty,
+ exercise_mc_qty, exercise_topic)
+ response["exercises"]["exercise_" + str(i)]["type"] = "readingExercises"
+ exercise_id = exercise_id + exercise_qty
+ elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
+ response["exercises"]["exercise_" + str(i)] = gen_writing_task_1(exercise_topic, exercise_difficulty)
+ response["exercises"]["exercise_" + str(i)]["type"] = "writing"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
+ response["exercises"]["exercise_" + str(i)] = gen_writing_task_2(exercise_topic, exercise_difficulty)
+ response["exercises"]["exercise_" + str(i)]["type"] = "writing"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
+ response["exercises"]["exercise_" + str(i)] = (
+ gen_speaking_part_1(exercise_topic, exercise_topic_2, exercise_difficulty))
+ response["exercises"]["exercise_" + str(i)]["type"] = "interactiveSpeaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
+ response["exercises"]["exercise_" + str(i)] = gen_speaking_part_2(exercise_topic, exercise_difficulty)
+ response["exercises"]["exercise_" + str(i)]["type"] = "speaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
+ response["exercises"]["exercise_" + str(i)] = gen_speaking_part_3(exercise_topic, exercise_difficulty)
+ response["exercises"]["exercise_" + str(i)]["type"] = "interactiveSpeaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_reading_passage_1(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q, exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_reading_passage_2(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q, exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+ if exercise_ideamatch_qty != -1:
+ exercises.append('ideaMatch')
+ exercise_qty_q.put(exercise_ideamatch_qty)
+ total_qty = total_qty + exercise_ideamatch_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_reading_passage_3(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q, exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+ if exercise_writeblanksfill_qty != -1:
+ exercises.append('writeBlanksFill')
+ exercise_qty_q.put(exercise_writeblanksfill_qty)
+ total_qty = total_qty + exercise_writeblanksfill_qty
+ if exercise_writeblanksform_qty != -1:
+ exercises.append('writeBlanksForm')
+ exercise_qty_q.put(exercise_writeblanksform_qty)
+ total_qty = total_qty + exercise_writeblanksform_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_listening_section_1(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q,
+ exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_listening_section_2(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q,
+ exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc3_qty != -1:
+ exercises.append('multipleChoice3Options')
+ exercise_qty_q.put(exercise_mc3_qty)
+ total_qty = total_qty + exercise_mc3_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_listening_section_3(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q,
+ exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+ if exercise_writeblanksfill_qty != -1:
+ exercises.append('writeBlanksFill')
+ exercise_qty_q.put(exercise_writeblanksfill_qty)
+ total_qty = total_qty + exercise_writeblanksfill_qty
+ if exercise_writeblanksform_qty != -1:
+ exercises.append('writeBlanksForm')
+ exercise_qty_q.put(exercise_writeblanksform_qty)
+ total_qty = total_qty + exercise_writeblanksform_qty
+
+ response["exercises"]["exercise_" + str(i)] = gen_listening_section_4(exercise_topic, exercise_difficulty,
+ exercises, exercise_qty_q,
+ exercise_id)
+ response["exercises"]["exercise_" + str(i)]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+
+ return response
+
+
+@app.route('/grade_short_answers', methods=['POST'])
+@jwt_required()
+def grade_short_answers():
+ data = request.get_json()
+
+ json_format = {
+ "exercises": [
+ {
+ "id": 1,
+ "correct": True,
+ "correct_answer": " correct answer if wrong"
+ }
+ ]
+ }
+
+ try:
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+ },
+ {
+ "role": "user",
+ "content": 'Grade these answers according to the text content and write a correct answer if they are '
+ 'wrong. Text, questions and answers:\n ' + str(data)
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
+ return response
+ except Exception as e:
+ return str(e)
+
+
@app.route('/fetch_tips', methods=['POST'])
@jwt_required()
def fetch_answer_tips():
@@ -1150,5 +1691,29 @@ def grading_summary():
return str(e)
+@app.route('/training_content', methods=['POST'])
+@jwt_required()
+def training_content():
+ try:
+ data = request.get_json()
+ return tc_service.get_tips(data)
+ except Exception as e:
+ app.logger.error(str(e))
+ return str(e)
+
+
+# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in firestore,
+# return the id right away, in generation view poll for the id
+@app.route('/upload_level', methods=['POST'])
+def upload_file():
+ if 'file' not in request.files:
+ return 'File wasn\'t uploaded', 400
+ file = request.files['file']
+ if file.filename == '':
+ return 'No selected file', 400
+ if file:
+ return upload_level_service.generate_level_from_file(file), 200
+
+
if __name__ == '__main__':
app.run()
diff --git a/faiss/ct_focus_tips_index.faiss b/faiss/ct_focus_tips_index.faiss
new file mode 100644
index 0000000..909571b
Binary files /dev/null and b/faiss/ct_focus_tips_index.faiss differ
diff --git a/faiss/language_for_writing_tips_index.faiss b/faiss/language_for_writing_tips_index.faiss
new file mode 100644
index 0000000..b9b254c
Binary files /dev/null and b/faiss/language_for_writing_tips_index.faiss differ
diff --git a/faiss/reading_skill_tips_index.faiss b/faiss/reading_skill_tips_index.faiss
new file mode 100644
index 0000000..7113625
Binary files /dev/null and b/faiss/reading_skill_tips_index.faiss differ
diff --git a/faiss/strategy_tips_index.faiss b/faiss/strategy_tips_index.faiss
new file mode 100644
index 0000000..8032155
Binary files /dev/null and b/faiss/strategy_tips_index.faiss differ
diff --git a/faiss/tips_metadata.pkl b/faiss/tips_metadata.pkl
new file mode 100644
index 0000000..ecb3614
Binary files /dev/null and b/faiss/tips_metadata.pkl differ
diff --git a/faiss/word_link_tips_index.faiss b/faiss/word_link_tips_index.faiss
new file mode 100644
index 0000000..b11fd5e
Binary files /dev/null and b/faiss/word_link_tips_index.faiss differ
diff --git a/faiss/word_partners_tips_index.faiss b/faiss/word_partners_tips_index.faiss
new file mode 100644
index 0000000..2f08b63
Binary files /dev/null and b/faiss/word_partners_tips_index.faiss differ
diff --git a/faiss/writing_skill_tips_index.faiss b/faiss/writing_skill_tips_index.faiss
new file mode 100644
index 0000000..fcae917
Binary files /dev/null and b/faiss/writing_skill_tips_index.faiss differ
diff --git a/helper/constants.py b/helper/constants.py
index c5f924c..fdd45e4 100644
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -18,7 +18,13 @@ GEN_FIELDS = ['topic']
GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
+READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
+LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
+ 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
+LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
+LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
+LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
TOTAL_READING_PASSAGE_1_EXERCISES = 13
TOTAL_READING_PASSAGE_2_EXERCISES = 13
@@ -35,7 +41,7 @@ SPEAKING_MIN_TIMER_DEFAULT = 14
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
- "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
+ "discrimination", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "discrimination", "discriminatory"]
EN_US_VOICES = [
@@ -168,7 +174,6 @@ topics = [
"Space Exploration",
"Artificial Intelligence",
"Climate Change",
- "World Religions",
"The Human Brain",
"Renewable Energy",
"Cultural Diversity",
diff --git a/helper/exercises.py b/helper/exercises.py
index 1d05bee..b3f22c5 100644
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -7,7 +7,6 @@ import uuid
import nltk
from wonderwords import RandomWord
-from helper.api_messages import QuestionType
from helper.constants import *
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_call, count_total_tokens
@@ -16,19 +15,19 @@ from helper.speech_to_text_helper import has_x_words
nltk.download('words')
-def gen_reading_passage_1(topic, req_exercises, difficulty):
+def gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
- number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
- passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
+ passage = generate_reading_passage_1_text(topic)
if passage == "":
- return gen_reading_passage_1(topic, req_exercises, difficulty)
- start_id = 1
+ return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
- return gen_reading_passage_1(topic, req_exercises, difficulty)
+ return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": {
@@ -39,19 +38,19 @@ def gen_reading_passage_1(topic, req_exercises, difficulty):
}
-def gen_reading_passage_2(topic, req_exercises, difficulty):
+def gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=14):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
- number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
- passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic)
+ passage = generate_reading_passage_2_text(topic)
if passage == "":
- return gen_reading_passage_2(topic, req_exercises, difficulty)
- start_id = 14
+ return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
- return gen_reading_passage_2(topic, req_exercises, difficulty)
+ return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": {
@@ -62,19 +61,19 @@ def gen_reading_passage_2(topic, req_exercises, difficulty):
}
-def gen_reading_passage_3(topic, req_exercises, difficulty):
+def gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=27):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
- number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
- passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic)
+ passage = generate_reading_passage_3_text(topic)
if passage == "":
- return gen_reading_passage_3(topic, req_exercises, difficulty)
- start_id = 27
+ return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
- return gen_reading_passage_3(topic, req_exercises, difficulty)
+ return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": {
@@ -145,7 +144,12 @@ def add_random_words_and_shuffle(word_array, num_random_words):
random.shuffle(combined_array)
- return combined_array
+ result = []
+ for i, word in enumerate(combined_array):
+ letter = chr(65 + i) # chr(65) is 'A'
+ result.append({"letter": letter, "word": word})
+
+ return result
def fillblanks_build_solutions_array(words, start_id):
@@ -239,7 +243,30 @@ def build_write_blanks_solutions_listening(words: [], start_id):
return solutions
-def generate_reading_passage(type: QuestionType, topic: str):
+def get_perfect_answer(question: str, size: int):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"perfect_answer": "perfect answer for the question"}')
+ },
+ {
+ "role": "user",
+ "content": ('Write a perfect answer for this writing exercise of a IELTS exam. Question: ' + question)
+
+ },
+ {
+ "role": "user",
+ "content": ('The answer must have at least ' + str(size) + ' words')
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
+
+
+def generate_reading_passage_1_text(topic: str):
messages = [
{
"role": "system",
@@ -250,17 +277,91 @@ def generate_reading_passage(type: QuestionType, topic: str):
{
"role": "user",
"content": (
- 'Generate an extensive text for IELTS ' + type.value + ', of at least 1500 words, on the topic '
- 'of "' + topic + '". The passage should offer '
- 'a substantial amount of information, '
- 'analysis, or narrative relevant to the chosen '
- 'subject matter. This text passage aims to '
- 'serve as the primary reading section of an '
- 'IELTS test, providing an in-depth and '
- 'comprehensive exploration of the topic. '
- 'Make sure that the generated text does not '
- 'contain forbidden subjects in muslim countries.')
+ 'Generate an extensive text for IELTS Reading Passage 1, of at least 800 words, on the topic '
+ 'of "' + topic + '". The passage should offer '
+ 'a substantial amount of information, '
+ 'analysis, or narrative relevant to the chosen '
+ 'subject matter. This text passage aims to '
+ 'serve as the primary reading section of an '
+ 'IELTS test, providing an in-depth and '
+ 'comprehensive exploration of the topic. '
+ 'Make sure that the generated text does not '
+ 'contain forbidden subjects in muslim countries.')
+ },
+ {
+ "role": "system",
+ "content": ('The generated text should be fairly easy to understand and have multiple paragraphs.')
+ },
+ ]
+ token_count = count_total_tokens(messages)
+ return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
+
+
+def generate_reading_passage_2_text(topic: str):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"title": "title of the text", "text": "generated text"}')
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Generate an extensive text for IELTS Reading Passage 2, of at least 800 words, on the topic '
+ 'of "' + topic + '". The passage should offer '
+ 'a substantial amount of information, '
+ 'analysis, or narrative relevant to the chosen '
+ 'subject matter. This text passage aims to '
+ 'serve as the primary reading section of an '
+ 'IELTS test, providing an in-depth and '
+ 'comprehensive exploration of the topic. '
+ 'Make sure that the generated text does not '
+ 'contain forbidden subjects in muslim countries.')
+
+ },
+ {
+ "role": "system",
+ "content": ('The generated text should be fairly hard to understand and have multiple paragraphs.')
+ },
+ ]
+ token_count = count_total_tokens(messages)
+ return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
+
+
+def generate_reading_passage_3_text(topic: str):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"title": "title of the text", "text": "generated text"}')
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Generate an extensive text for IELTS Reading Passage 3, of at least 800 words, on the topic '
+ 'of "' + topic + '". The passage should offer '
+ 'a substantial amount of information, '
+ 'analysis, or narrative relevant to the chosen '
+ 'subject matter. This text passage aims to '
+ 'serve as the primary reading section of an '
+ 'IELTS test, providing an in-depth and '
+ 'comprehensive exploration of the topic. '
+ 'Make sure that the generated text does not '
+ 'contain forbidden subjects in muslim countries.')
+
+ },
+ {
+ "role": "system",
+ "content": ('The generated text should be very hard to understand and include different points, theories, '
+ 'subtle differences of opinions from people, correctly sourced to the person who said it, '
+ 'over the specified topic and have multiple paragraphs.')
+ },
+ {
+ "role": "user",
+ "content": "Use real text excerpts on you generated passage and cite the sources."
}
]
token_count = count_total_tokens(messages)
@@ -283,6 +384,16 @@ def generate_listening_1_conversation(topic: str):
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
+ },
+ {
+ "role": "user",
+ "content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
+
+ },
+ {
+ "role": "user",
+ "content": 'Try to have spelling of names (cities, people, etc)'
+
}
]
token_count = count_total_tokens(messages)
@@ -359,8 +470,8 @@ def generate_listening_3_conversation(topic: str):
"content": (
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
- 'Make sure that the generated conversation does not contain forbidden subjects in '
- 'muslim countries.')
+ 'Make sure that the generated conversation does not contain forbidden subjects in '
+ 'muslim countries.')
}
]
@@ -400,9 +511,9 @@ def generate_listening_4_monologue(topic: str):
{
"role": "user",
"content": (
- 'Generate a comprehensive monologue on the academic subject '
+ 'Generate a comprehensive and complex monologue on the academic subject '
'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
- 'muslim countries.')
+ 'muslim countries.')
}
]
@@ -442,6 +553,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
+ elif req_exercise == "ideaMatch":
+ question = gen_idea_match_exercise(passage, number_of_exercises, start_id)
+ exercises.append(question)
+ print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
@@ -467,7 +582,12 @@ def generate_listening_conversation_exercises(conversation: str, req_exercises:
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
- difficulty)
+ difficulty, 4)
+ exercises.append(question)
+ print("Added multiple choice: " + str(question))
+ elif req_exercise == "multipleChoice3Options":
+ question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
+ difficulty, 3)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
@@ -559,34 +679,49 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
- '{ "summary": "summary", "words": ["word_1", "word_2"] }')
+ '{ "summary": "summary" }')
},
{
"role": "user",
- "content": ('Summarize this text: "'+ text + '"')
-
- },
- {
- "role": "user",
- "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
- 'expressions, from the summary.')
+ "content": ('Summarize this text: "' + text + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count,
- ["summary"],
- GEN_QUESTION_TEMPERATURE)
+ ["summary"],
+ GEN_QUESTION_TEMPERATURE)
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"words": ["word_1", "word_2"] }')
+ },
+ {
+ "role": "user",
+ "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
+ 'expressions, from this:\n' + response[
+ "summary"])
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ words_response = make_openai_call(GPT_4_O, messages, token_count,
+ ["summary"],
+ GEN_QUESTION_TEMPERATURE)
+ response["words"] = words_response["words"]
replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
- options_words = add_random_words_and_shuffle(response["words"], 5)
+ options_words = add_random_words_and_shuffle(response["words"], 1)
solutions = fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
- "prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
+ "prompt": "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once.",
"solutions": solutions,
"text": replaced_summary,
@@ -608,18 +743,19 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
{
"role": "user",
"content": (
- 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
- 'Ensure that your statements accurately represent '
- 'information or inferences from the text, and '
- 'provide a variety of responses, including, at '
- 'least one of each True, False, and Not Given, '
- 'as appropriate.\n\nReference text:\n\n ' + text)
+ 'Generate ' + str(
+ quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
+ 'Ensure that your statements accurately represent '
+ 'information or inferences from the text, and '
+ 'provide a variety of responses, including, at '
+ 'least one of each True, False, and Not Given, '
+ 'as appropriate.\n\nReference text:\n\n ' + text)
}
]
token_count = count_total_tokens(messages)
- questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"],
+ questions = make_openai_call(GPT_4_O, messages, token_count, ["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
@@ -653,7 +789,7 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
}
]
token_count = count_total_tokens(messages)
- questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
+ questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
@@ -678,18 +814,19 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
{
"role": "user",
"content": (
- 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))
+ 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(
+ paragraphs))
}
]
token_count = count_total_tokens(messages)
- headings = make_openai_call(GPT_4_O, messages, token_count,["headings"],
+ headings = make_openai_call(GPT_4_O, messages, token_count, ["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
- paragraph["heading"] = headings[i]
+ paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
@@ -714,6 +851,65 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
}
+def gen_idea_match_exercise(text: str, quantity: int, start_id):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"ideas": [ '
+ '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
+ '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
+ ']}')
+ },
+ {
+ "role": "user",
+ "content": (
+ 'From the text extract ' + str(
+ quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"]
+
+ return {
+ "id": str(uuid.uuid4()),
+ "allowRepetition": False,
+ "options": build_options(ideas),
+ "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
+ "sentences": build_sentences(ideas, start_id),
+ "type": "matchSentences"
+ }
+
+
+def build_options(ideas):
+ options = []
+ letters = iter(string.ascii_uppercase)
+ for idea in ideas:
+ options.append({
+ "id": next(letters),
+ "sentence": idea["from"]
+ })
+ return options
+
+
+def build_sentences(ideas, start_id):
+ sentences = []
+ letters = iter(string.ascii_uppercase)
+ for idea in ideas:
+ sentences.append({
+ "solution": next(letters),
+ "sentence": idea["idea"]
+ })
+
+ random.shuffle(sentences)
+ for i, sentence in enumerate(sentences, start=start_id):
+ sentence["id"] = i
+ return sentences
+
+
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
@@ -723,7 +919,7 @@ def assign_letters_to_paragraphs(paragraphs):
return result
-def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
+def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty, n_options=4):
messages = [
{
"role": "system",
@@ -737,14 +933,15 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
{
"role": "user",
"content": (
- 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
- 'of for this conversation:\n"' + text + '"')
+ 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
+ n_options) + ' options '
+ 'of for this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
- question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
+ question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -753,7 +950,7 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
}
-def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
+def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty, n_options=4):
messages = [
{
"role": "system",
@@ -768,14 +965,15 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
"role": "user",
"content": (
'Generate ' + str(
- quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
- 'of for this monologue:\n"' + text + '"')
+ quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
+ n_options) + ' options '
+ 'of for this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
- question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
+ question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -803,7 +1001,7 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti
]
token_count = count_total_tokens(messages)
- questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
+ questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
@@ -869,7 +1067,6 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
-
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
@@ -884,7 +1081,7 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
}
]
- words = make_openai_call(GPT_4_O, word_messages, token_count,["words"],
+ words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
@@ -951,13 +1148,19 @@ def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: i
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
- '{"form": ["key: value", "key2: value"]}')
+ '{"form": ["key": "value", "key2": "value"]}')
},
{
"role": "user",
"content": (
'Generate a form with ' + str(
- quantity) + ' ' + difficulty + ' difficulty key-value pairs about this conversation:\n"' + text + '"')
+ quantity) + ' entries with information about this conversation:\n"' + text + '"')
+
+ },
+ {
+ "role": "user",
+ "content": 'It must be a form and not questions. '
+ 'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
}
]
@@ -1019,11 +1222,11 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
- '[{"id": "A", "text": '
- '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
- '"Happy"}, {"id": "D", "text": "Jump"}], '
- '"prompt": "Which of the following is a conjunction?", '
- '"solution": "A", "variant": "text"}]}')
+ '[{"id": "A", "text": '
+ '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+ '"Happy"}, {"id": "D", "text": "Jump"}], '
+ '"prompt": "Which of the following is a conjunction?", '
+ '"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
@@ -1033,8 +1236,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
- ["questions"],
- GEN_QUESTION_TEMPERATURE)
+ ["questions"],
+ GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
@@ -1064,15 +1267,20 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
for exam in all_exams:
exam_dict = exam.to_dict()
- if any(
- exercise["prompt"] == current_exercise["prompt"] and
- any(exercise["options"][0]["text"] == current_option["text"] for current_option in
- current_exercise["options"])
- for exercise in exam_dict.get("exercises", [])[0]["questions"]
- ):
- return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
+ if len(exam_dict.get("parts", [])) > 0:
+ exercise_dict = exam_dict.get("parts", [])[0]
+ if len(exercise_dict.get("exercises", [])) > 0:
+ if any(
+ exercise["prompt"] == current_exercise["prompt"] and
+ any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+ current_exercise["options"])
+ for exercise in exercise_dict.get("exercises", [])[0]["questions"]
+ ):
+ return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam,
+ seen_keys)
return current_exercise, seen_keys
+
def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
@@ -1089,7 +1297,54 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s
current_exercise["options"])
for exercise in exam.get("questions", [])
):
- return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
+ return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam,
+ seen_keys)
+ return current_exercise, seen_keys
+
+
+def replace_blank_space_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
+ # Extracting relevant fields for comparison
+ key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+ # Check if the key is in the set
+ if key in seen_keys:
+ return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam,
+ seen_keys)
+ else:
+ seen_keys.add(key)
+
+ for exam in all_exams:
+ if any(
+ exercise["prompt"] == current_exercise["prompt"] and
+ any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+ current_exercise["options"])
+ for exercise in exam.get("questions", [])
+ ):
+ return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(),
+ current_exam,
+ seen_keys)
+ return current_exercise, seen_keys
+
+
+def replace_underlined_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
+ # Extracting relevant fields for comparison
+ key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+ # Check if the key is in the set
+ if key in seen_keys:
+ return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam,
+ seen_keys)
+ else:
+ seen_keys.add(key)
+
+ for exam in all_exams:
+ if any(
+ exercise["prompt"] == current_exercise["prompt"] and
+ any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+ current_exercise["options"])
+ for exercise in exam.get("questions", [])
+ ):
+ return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(),
+ current_exam,
+ seen_keys)
return current_exercise, seen_keys
@@ -1112,8 +1367,66 @@ def generate_single_mc_level_question():
]
token_count = count_total_tokens(messages)
- question = make_openai_call(GPT_4_O, messages, token_count,["options"],
- GEN_QUESTION_TEMPERATURE)
+ question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
+ GEN_QUESTION_TEMPERATURE)
+
+ return question
+
+
+def generate_single_mc_blank_space_level_question():
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+ '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
+ '"solution": "A", "variant": "text"}')
+ },
+ {
+ "role": "user",
+ "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, '
+ 'it can be easy, intermediate or advanced.')
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
+ GEN_QUESTION_TEMPERATURE)
+
+ return question
+
+
+def generate_single_mc_underlined_level_question():
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+ '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
+ '"solution": "A", "variant": "text"}')
+ },
+ {
+ "role": "user",
+ "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, '
+ 'it can be easy, intermediate or advanced.')
+
+ },
+ {
+ "role": "user",
+ "content": (
+ 'The type of multiple choice is the prompt has wrong words or group of words and the options are to '
+ 'find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+ 'Prompt: "I complain about my boss all the time, but my colleagues thinks the boss is nice."\n'
+ 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"')
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
+ GEN_QUESTION_TEMPERATURE)
return question
@@ -1130,7 +1443,7 @@ def parse_conversation(conversation_data):
return "\n".join(readable_text)
-def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams):
+def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams=None):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice blank space questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
@@ -1142,11 +1455,11 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
- '[{"id": "A", "text": '
- '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
- '"Happy"}, {"id": "D", "text": "Jump"}], '
- '"prompt": "Which of the following is a conjunction?", '
- '"solution": "A", "variant": "text"}]}')
+ '[{"id": "A", "text": '
+ '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+ '"Happy"}, {"id": "D", "text": "Jump"}], '
+ '"prompt": "Which of the following is a conjunction?", '
+ '"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
@@ -1156,21 +1469,24 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
- ["questions"],
- GEN_QUESTION_TEMPERATURE)
+ ["questions"],
+ GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
- return gen_multiple_choice_level(quantity, start_id)
+ return gen_multiple_choice_blank_space_utas(quantity, start_id)
else:
- seen_keys = set()
- for i in range(len(question["questions"])):
- question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_exams, question["questions"][i],
- question,
- seen_keys)
- return fix_exercise_ids(question, start_id)
+ if all_exams is not None:
+ seen_keys = set()
+ for i in range(len(question["questions"])):
+ question["questions"][i], seen_keys = (
+ replace_blank_space_exercise_if_exists_utas(all_exams, question["questions"][i], question,
+ seen_keys))
+ response = fix_exercise_ids(question, start_id)
+ response["questions"] = randomize_mc_options_order(response["questions"])
+ return response
-def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
+def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=None):
json_format = {
"questions": [
{
@@ -1200,13 +1516,14 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
]
}
- gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (' multiple choice questions of 4 options for an english '
- 'level exam, some easy questions, some intermediate '
- 'questions and some advanced questions.Ensure that '
- 'the questions cover a range of topics such as verb '
- 'tense, subject-verb agreement, pronoun usage, '
- 'sentence structure, and punctuation. Make sure '
- 'every question only has 1 correct answer.')
+ gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (
+ ' multiple choice questions of 4 options for an english '
+ 'level exam, some easy questions, some intermediate '
+ 'questions and some advanced questions.Ensure that '
+ 'the questions cover a range of topics such as verb '
+ 'tense, subject-verb agreement, pronoun usage, '
+ 'sentence structure, and punctuation. Make sure '
+ 'every question only has 1 correct answer.')
messages = [
{
@@ -1229,13 +1546,22 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
- ["questions"],
- GEN_QUESTION_TEMPERATURE)
+ ["questions"],
+ GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
- return gen_multiple_choice_level(quantity, start_id)
+ return gen_multiple_choice_underlined_utas(quantity, start_id)
else:
- return fix_exercise_ids(question, start_id)["questions"]
+ if all_exams is not None:
+ seen_keys = set()
+ for i in range(len(question["questions"])):
+ question["questions"][i], seen_keys = (
+ replace_underlined_exercise_if_exists_utas(all_exams, question["questions"][i], question,
+ seen_keys))
+ response = fix_exercise_ids(question, start_id)
+ response["questions"] = randomize_mc_options_order(response["questions"])
+ return response
+
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = {
@@ -1275,10 +1601,11 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
{
"role": "user",
"content": (
- 'From the generated text choose ' + str(quantity) + ' words (cannot be sequential words) to replace '
- 'once with {{id}} where id starts on ' + str(start_id) + ' and is '
- 'incremented for each word. The ids must be ordered throughout the text and the words must be '
- 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
+ 'From the generated text choose ' + str(
+ quantity) + ' words (cannot be sequential words) to replace '
+ 'once with {{id}} where id starts on ' + str(start_id) + ' and is '
+ 'incremented for each word. The ids must be ordered throughout the text and the words must be '
+ 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
}
]
@@ -1289,14 +1616,14 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
return question["question"]
-def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
- passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
+def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
+ passage = generate_reading_passage_1_text(topic)
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
- mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id+sa_quantity, mc_quantity)
+ mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
- "shortAnswer":short_answer,
+ "shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
@@ -1305,6 +1632,7 @@ def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic
}
}
+
def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
@@ -1327,8 +1655,10 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count,
- ["questions"],
- GEN_QUESTION_TEMPERATURE)["questions"]
+ ["questions"],
+ GEN_QUESTION_TEMPERATURE)["questions"]
+
+
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
json_format = {
"questions": [
@@ -1366,7 +1696,8 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
},
{
"role": "user",
- "content": 'Generate ' + str(mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
+ "content": 'Generate ' + str(
+ mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
},
{
"role": "user",
@@ -1382,4 +1713,379 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
if len(question["questions"]) != mc_quantity:
return gen_multiple_choice_level(mc_quantity, start_id)
else:
- return fix_exercise_ids(question, start_id)["questions"]
\ No newline at end of file
+ response = fix_exercise_ids(question, start_id)
+ response["questions"] = randomize_mc_options_order(response["questions"])
+ return response
+
+
+def generate_level_mc(start_id: int, quantity: int, all_questions=None):
+ json_format = {
+ "questions": [
+ {
+ "id": "9",
+ "options": [
+ {
+ "id": "A",
+ "text": "a"
+ },
+ {
+ "id": "B",
+ "text": "b"
+ },
+ {
+ "id": "C",
+ "text": "c"
+ },
+ {
+ "id": "D",
+ "text": "d"
+ }
+ ],
+ "prompt": "prompt",
+ "solution": "A",
+ "variant": "text"
+ }
+ ]
+ }
+
+ messages = [
+ {
+ "role": "system",
+ "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+ },
+ {
+ "role": "user",
+ "content": ('Generate ' + str(quantity) + ' multiple choice question of 4 options for an english level '
+ 'exam, it can be easy, intermediate or advanced.')
+
+ },
+ {
+ "role": "user",
+ "content": 'Make sure every question only has 1 correct answer.'
+ }
+ ]
+ token_count = count_total_tokens(messages)
+
+ question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
+ GEN_QUESTION_TEMPERATURE)
+
+ if all_questions is not None:
+ seen_keys = set()
+ for i in range(len(question["questions"])):
+ question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_questions,
+ question["questions"][i],
+ question,
+ seen_keys)
+ response = fix_exercise_ids(question, start_id)
+ response["questions"] = randomize_mc_options_order(response["questions"])
+ return response
+
+
+def randomize_mc_options_order(questions):
+ option_ids = ['A', 'B', 'C', 'D']
+
+ for question in questions:
+ # Store the original solution text
+ original_solution_text = next(
+ option['text'] for option in question['options'] if option['id'] == question['solution'])
+
+ # Shuffle the options
+ random.shuffle(question['options'])
+
+ # Update the option ids and find the new solution id
+ for idx, option in enumerate(question['options']):
+ option['id'] = option_ids[idx]
+ if option['text'] == original_solution_text:
+ question['solution'] = option['id']
+
+ return questions
+
+
+def gen_writing_task_1(topic, difficulty):
+ messages = [
+ {
+ "role": "system",
+ "content": ('You are a helpful assistant designed to output JSON on this format: '
+ '{"prompt": "prompt content"}')
+ },
+ {
+ "role": "user",
+ "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
+ 'student to compose a letter. The prompt should present a specific scenario or situation, '
+ 'based on the topic of "' + topic + '", requiring the student to provide information, '
+ 'advice, or instructions within the letter. '
+ 'Make sure that the generated prompt is '
+ 'of ' + difficulty + 'difficulty and does not contain '
+ 'forbidden subjects in muslim '
+ 'countries.')
+ },
+ {
+ "role": "user",
+ "content": 'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
+ 'the answer should include.'
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt",
+ GEN_QUESTION_TEMPERATURE)
+ return {
+ "question": add_newline_before_hyphen(response["prompt"].strip()),
+ "difficulty": difficulty,
+ "topic": topic
+ }
+
+
+def add_newline_before_hyphen(s):
+ return s.replace(" -", "\n-")
+
+
+def gen_writing_task_2(topic, difficulty):
+ messages = [
+ {
+ "role": "system",
+ "content": ('You are a helpful assistant designed to output JSON on this format: '
+ '{"prompt": "prompt content"}')
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing '
+ 'Task 2 General Training that directs the '
+ 'candidate'
+ 'to delve into an in-depth analysis of '
+ 'contrasting perspectives on the topic '
+ 'of "' + topic + '". The candidate should be '
+ 'asked to discuss the '
+ 'strengths and weaknesses of '
+ 'both viewpoints.')
+ },
+ {
+ "role": "user",
+ "content": 'The question should lead to an answer with either "theories", "complicated information" or '
+ 'be "very descriptive" on the topic.'
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE)
+ return {
+ "question": response["prompt"].strip(),
+ "difficulty": difficulty,
+ "topic": topic
+ }
+
+
+def gen_speaking_part_1(first_topic: str, second_topic: str, difficulty):
+ json_format = {
+ "first_topic": "topic 1",
+ "second_topic": "topic 2",
+ "questions": [
+ "Introductory question about the first topic, starting the topic with 'Let's talk about x' and then the "
+ "question.",
+ "Follow up question about the first topic",
+ "Follow up question about the first topic",
+ "Question about second topic",
+ "Follow up question about the second topic",
+ ]
+ }
+
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
+ 'that encourages candidates to delve deeply into '
+ 'personal experiences, preferences, or insights on the topic '
+ 'of "' + first_topic + '" and the topic of "' + second_topic + '". '
+ 'Make sure that the generated '
+ 'question'
+ 'does not contain forbidden '
+ 'subjects in'
+ 'muslim countries.')
+ },
+ {
+ "role": "user",
+ "content": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, '
+ 'past and future).'
+ },
+ {
+ "role": "user",
+ "content": 'They must be 1 single question each and not be double-barreled questions.'
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_4_O, messages, token_count, ["first_topic"],
+ GEN_QUESTION_TEMPERATURE)
+ response["type"] = 1
+ response["difficulty"] = difficulty
+ return response
+
+
+def gen_speaking_part_2(topic: str, difficulty):
+ json_format = {
+ "topic": "topic",
+ "question": "question",
+ "prompts": [
+ "prompt_1",
+ "prompt_2",
+ "prompt_3"
+ ],
+ "suffix": "And explain why..."
+ }
+
+ messages = [
+ {
+ "role": "system",
+ "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Create a question of medium difficulty for IELTS Speaking Part 2 '
+ 'that encourages candidates to narrate a '
+ 'personal experience or story related to the topic '
+ 'of "' + topic + '". Include 3 prompts that '
+ 'guide the candidate to describe '
+ 'specific aspects of the experience, '
+ 'such as details about the situation, '
+ 'their actions, and the reasons it left a '
+ 'lasting impression. Make sure that the '
+ 'generated question does not contain '
+ 'forbidden subjects in muslim countries.')
+ },
+ {
+ "role": "user",
+ "content": 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
+ 'that start with "And explain why".'
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
+ response["type"] = 2
+ response["difficulty"] = difficulty
+ response["topic"] = topic
+ return response
+
+
+def gen_speaking_part_3(topic: str, difficulty):
+ json_format = {
+ "topic": "topic",
+ "questions": [
+ "Introductory question about the topic.",
+ "Follow up question about the topic",
+ "Follow up question about the topic",
+ "Follow up question about the topic",
+ "Follow up question about the topic"
+ ]
+ }
+
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a '
+ 'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring '
+ 'they explore various aspects, perspectives, and implications related to the topic.'
+ 'Make sure that the generated question does not contain forbidden subjects in muslim countries.')
+
+ },
+ {
+ "role": "user",
+ "content": 'They must be 1 single question each and not be double-barreled questions.'
+
+ }
+ ]
+ token_count = count_total_tokens(messages)
+ response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
+ # Remove the numbers from the questions only if the string starts with a number
+ response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for
+ question in response["questions"]]
+ response["type"] = 3
+ response["difficulty"] = difficulty
+ response["topic"] = topic
+ return response
+
+
+def gen_listening_section_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1):
+ if (len(req_exercises) == 0):
+ req_exercises = random.sample(LISTENING_1_EXERCISE_TYPES, 1)
+
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
+
+ processed_conversation = generate_listening_1_conversation(topic)
+
+ exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation),
+ req_exercises,
+ number_of_exercises_q,
+ start_id, difficulty)
+ return {
+ "exercises": exercises,
+ "text": processed_conversation,
+ "difficulty": difficulty
+ }
+
+
+def gen_listening_section_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=11):
+ if (len(req_exercises) == 0):
+ req_exercises = random.sample(LISTENING_2_EXERCISE_TYPES, 2)
+
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
+
+ monologue = generate_listening_2_monologue(topic)
+
+ exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
+ start_id, difficulty)
+ return {
+ "exercises": exercises,
+ "text": monologue,
+ "difficulty": difficulty
+ }
+
+
+def gen_listening_section_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=21):
+ if (len(req_exercises) == 0):
+ req_exercises = random.sample(LISTENING_3_EXERCISE_TYPES, 1)
+
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
+
+ processed_conversation = generate_listening_3_conversation(topic)
+
+ exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
+ number_of_exercises_q,
+ start_id, difficulty)
+ return {
+ "exercises": exercises,
+ "text": processed_conversation,
+ "difficulty": difficulty
+ }
+
+
+def gen_listening_section_4(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=31):
+ if (len(req_exercises) == 0):
+ req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
+
+ if (number_of_exercises_q.empty()):
+ number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
+
+ monologue = generate_listening_4_monologue(topic)
+
+ exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
+ start_id, difficulty)
+ return {
+ "exercises": exercises,
+ "text": monologue,
+ "difficulty": difficulty
+ }
diff --git a/helper/gpt_zero.py b/helper/gpt_zero.py
new file mode 100644
index 0000000..08c4f1a
--- /dev/null
+++ b/helper/gpt_zero.py
@@ -0,0 +1,50 @@
+from logging import getLogger
+from typing import Dict, Optional
+import requests
+
+
+class GPTZero:
+ _GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
+
+ def __init__(self, gpt_zero_key: str):
+ self._logger = getLogger(__name__)
+ if gpt_zero_key is None:
+ self._logger.warning('GPT Zero key was not included! Skipping ai detection when grading.')
+ self._gpt_zero_key = gpt_zero_key
+ self._header = {
+ 'x-api-key': gpt_zero_key
+ }
+
+ def run_detection(self, text: str):
+ if self._gpt_zero_key is None:
+ return None
+ data = {
+ 'document': text,
+ 'version': '',
+ 'multilingual': False
+ }
+ response = requests.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
+ if response.status_code != 200:
+ self._logger.error(f'GPT\'s Zero Endpoint returned with {response.status_code}: {response.json()}')
+ return None
+ return self._parse_detection(response.json())
+
+ def _parse_detection(self, response: Dict) -> Optional[Dict]:
+ try:
+ text_scan = response["documents"][0]
+ filtered_sentences = [
+ {
+ "sentence": item["sentence"],
+ "highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
+ }
+ for item in text_scan["sentences"]
+ ]
+ return {
+ "class_probabilities": text_scan["class_probabilities"],
+ "confidence_category": text_scan["confidence_category"],
+ "predicted_class": text_scan["predicted_class"],
+ "sentences": filtered_sentences
+ }
+ except Exception as e:
+ self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
+ return None
diff --git a/helper/heygen_api.py b/helper/heygen_api.py
index 149ed70..864794b 100644
--- a/helper/heygen_api.py
+++ b/helper/heygen_api.py
@@ -1,17 +1,19 @@
import os
import random
import time
+from logging import getLogger
import requests
from dotenv import load_dotenv
-import app
from helper.constants import *
from helper.firebase_helper import upload_file_firebase_get_url, save_to_db_with_id
from heygen.AvatarEnum import AvatarEnum
load_dotenv()
+logger = getLogger(__name__)
+
# Get HeyGen token
TOKEN = os.getenv("HEY_GEN_TOKEN")
FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
@@ -29,26 +31,32 @@ GET_HEADER = {
def create_videos_and_save_to_db(exercises, template, id):
+ avatar = random.choice(list(AvatarEnum))
# Speaking 1
# Using list comprehension to find the element with the desired value in the 'type' field
found_exercises_1 = [element for element in exercises if element.get('type') == 1]
# Check if any elements were found
if found_exercises_1:
exercise_1 = found_exercises_1[0]
- app.app.logger.info('Creating video for speaking part 1')
- sp1_result = create_video(exercise_1["question"], random.choice(list(AvatarEnum)))
- if sp1_result is not None:
- sound_file_path = VIDEO_FILES_PATH + sp1_result
- firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
- url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
- sp1_video_path = firebase_file_path
- sp1_video_url = url
- template["exercises"][0]["text"] = exercise_1["question"]
- template["exercises"][0]["title"] = exercise_1["topic"]
- template["exercises"][0]["video_url"] = sp1_video_url
- template["exercises"][0]["video_path"] = sp1_video_path
- else:
- app.app.logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
+ sp1_questions = []
+ logger.info('Creating video for speaking part 1')
+ for question in exercise_1["questions"]:
+ sp1_result = create_video(question, avatar)
+ if sp1_result is not None:
+ sound_file_path = VIDEO_FILES_PATH + sp1_result
+ firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
+ url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+ video = {
+ "text": question,
+ "video_path": firebase_file_path,
+ "video_url": url
+ }
+ sp1_questions.append(video)
+ else:
+ logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
+ template["exercises"][0]["prompts"] = sp1_questions
+ template["exercises"][0]["first_title"] = exercise_1["first_topic"]
+ template["exercises"][0]["second_title"] = exercise_1["second_topic"]
# Speaking 2
# Using list comprehension to find the element with the desired value in the 'type' field
@@ -56,8 +64,8 @@ def create_videos_and_save_to_db(exercises, template, id):
# Check if any elements were found
if found_exercises_2:
exercise_2 = found_exercises_2[0]
- app.app.logger.info('Creating video for speaking part 2')
- sp2_result = create_video(exercise_2["question"], random.choice(list(AvatarEnum)))
+ logger.info('Creating video for speaking part 2')
+ sp2_result = create_video(exercise_2["question"], avatar)
if sp2_result is not None:
sound_file_path = VIDEO_FILES_PATH + sp2_result
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result
@@ -70,7 +78,7 @@ def create_videos_and_save_to_db(exercises, template, id):
template["exercises"][1]["video_url"] = sp2_video_url
template["exercises"][1]["video_path"] = sp2_video_path
else:
- app.app.logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
+ logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
# Speaking 3
# Using list comprehension to find the element with the desired value in the 'type' field
@@ -79,8 +87,7 @@ def create_videos_and_save_to_db(exercises, template, id):
if found_exercises_3:
exercise_3 = found_exercises_3[0]
sp3_questions = []
- avatar = random.choice(list(AvatarEnum))
- app.app.logger.info('Creating videos for speaking part 3')
+ logger.info('Creating videos for speaking part 3')
for question in exercise_3["questions"]:
result = create_video(question, avatar)
if result is not None:
@@ -94,7 +101,7 @@ def create_videos_and_save_to_db(exercises, template, id):
}
sp3_questions.append(video)
else:
- app.app.logger.error("Failed to create video for part 3 question: " + question)
+ logger.error("Failed to create video for part 3 question: " + question)
template["exercises"][2]["prompts"] = sp3_questions
template["exercises"][2]["title"] = exercise_3["topic"]
@@ -106,7 +113,7 @@ def create_videos_and_save_to_db(exercises, template, id):
template["exercises"].pop(0)
save_to_db_with_id("speaking", template, id)
- app.app.logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
+ logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
def create_video(text, avatar):
@@ -127,8 +134,8 @@ def create_video(text, avatar):
}
}
response = requests.post(create_video_url, headers=POST_HEADER, json=data)
- app.app.logger.info(response.status_code)
- app.app.logger.info(response.json())
+ logger.info(response.status_code)
+ logger.info(response.json())
# GET TO CHECK STATUS AND GET VIDEO WHEN READY
video_id = response.json()["data"]["video_id"]
@@ -147,11 +154,11 @@ def create_video(text, avatar):
error = response_data["data"]["error"]
if status != "completed" and error is None:
- app.app.logger.info(f"Status: {status}")
+ logger.info(f"Status: {status}")
time.sleep(10) # Wait for 10 second before the next request
- app.app.logger.info(response.status_code)
- app.app.logger.info(response.json())
+ logger.info(response.status_code)
+ logger.info(response.json())
# DOWNLOAD VIDEO
download_url = response.json()['data']['video_url']
@@ -165,8 +172,8 @@ def create_video(text, avatar):
output_path = os.path.join(output_directory, output_filename)
with open(output_path, 'wb') as f:
f.write(response.content)
- app.app.logger.info(f"File '{output_filename}' downloaded successfully.")
+ logger.info(f"File '{output_filename}' downloaded successfully.")
return output_filename
else:
- app.app.logger.error(f"Failed to download file. Status code: {response.status_code}")
+ logger.error(f"Failed to download file. Status code: {response.status_code}")
return None
diff --git a/helper/question_templates.py b/helper/question_templates.py
index b065626..a6edfa8 100644
--- a/helper/question_templates.py
+++ b/helper/question_templates.py
@@ -1136,12 +1136,11 @@ def getSpeakingTemplate():
"exercises": [
{
"id": str(uuid.uuid4()),
- "prompts": [],
- "text": "text",
- "title": "topic",
- "video_url": "sp1_video_url",
- "video_path": "sp1_video_path",
- "type": "speaking"
+ "prompts": ["questions"],
+ "text": "Listen carefully and respond.",
+ "first_title": "first_topic",
+ "second_title": "second_topic",
+ "type": "interactiveSpeaking"
},
{
"id": str(uuid.uuid4()),
diff --git a/modules/__init__.py b/modules/__init__.py
new file mode 100644
index 0000000..2aec732
--- /dev/null
+++ b/modules/__init__.py
@@ -0,0 +1,5 @@
+from .gpt import GPT
+
+__all__ = [
+ "GPT"
+]
diff --git a/modules/gpt.py b/modules/gpt.py
new file mode 100644
index 0000000..58a1a93
--- /dev/null
+++ b/modules/gpt.py
@@ -0,0 +1,66 @@
+import json
+from logging import getLogger
+
+from typing import List, Optional, Callable, TypeVar
+
+from openai.types.chat import ChatCompletionMessageParam
+from pydantic import BaseModel
+
+T = TypeVar('T', bound=BaseModel)
+
+
+class GPT:
+
+ def __init__(self, openai_client):
+ self._client = openai_client
+ self._default_model = "gpt-4o-2024-08-06"
+ self._logger = getLogger(__name__)
+
+ def prediction(
+ self,
+ messages: List[ChatCompletionMessageParam],
+ map_to_model: Callable,
+ json_scheme: str,
+ *,
+ model: Optional[str] = None,
+ temperature: Optional[float] = None,
+ max_retries: int = 3
+ ) -> List[T] | T | None:
+ params = {
+ "messages": messages,
+ "response_format": {"type": "json_object"},
+ "model": model if model else self._default_model
+ }
+
+ if temperature:
+ params["temperature"] = temperature
+
+ attempt = 0
+ while attempt < max_retries:
+ result = self._client.chat.completions.create(**params)
+ result_content = result.choices[0].message.content
+ try:
+ result_json = json.loads(result_content)
+ return map_to_model(result_json)
+ except Exception as e:
+ attempt += 1
+ self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
+ params["messages"] = [
+ {
+ "role": "user",
+ "content": (
+ "Your previous response wasn't in the json format I've explicitly told you to output. "
+ f"In your next response, you will fix it and return me just the json I've asked."
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f"Previous response: {result_content}\n"
+ f"JSON format: {json_scheme}"
+ )
+ }
+ ]
+ if attempt >= max_retries:
+ self._logger.error(f"Max retries exceeded!")
+ return None
diff --git a/modules/helper/__init__.py b/modules/helper/__init__.py
new file mode 100644
index 0000000..447b288
--- /dev/null
+++ b/modules/helper/__init__.py
@@ -0,0 +1,5 @@
+from .logger import LoggerHelper
+
+__all__ = [
+ "LoggerHelper"
+]
diff --git a/modules/helper/file_helper.py b/modules/helper/file_helper.py
new file mode 100644
index 0000000..9008127
--- /dev/null
+++ b/modules/helper/file_helper.py
@@ -0,0 +1,77 @@
+import base64
+import io
+import os
+import shutil
+import subprocess
+from typing import Optional
+
+import numpy as np
+import pypandoc
+from PIL import Image
+
+
+class FileHelper:
+
+ # Supposedly pandoc covers a wide range of file extensions only tested with docx
+ @staticmethod
+ def convert_file_to_pdf(input_path: str, output_path: str):
+ pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
+ '-V', 'geometry:paperwidth=5.5in',
+ '-V', 'geometry:paperheight=8.5in',
+ '-V', 'geometry:margin=0.5in',
+ '-V', 'pagestyle=empty'
+ ])
+
+ @staticmethod
+ def convert_file_to_html(input_path: str, output_path: str):
+ pypandoc.convert_file(input_path, 'html', outputfile=output_path)
+
+ @staticmethod
+ def pdf_to_png(path_id: str):
+ to_png = f"pdftoppm -png exercises.pdf page"
+ result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
+ if result.returncode != 0:
+ raise Exception(
+ f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
+
+ @staticmethod
+ def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
+ with Image.open(io.BytesIO(image_bytes)) as img:
+ img_gray = img.convert('L')
+ img_array = np.array(img_gray)
+ non_white_pixels = np.sum(img_array < 255)
+
+ return non_white_pixels <= image_threshold
+
+ @classmethod
+ def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
+ with open(image_path, "rb") as image_file:
+ image_bytes = image_file.read()
+
+ if cls.is_page_blank(image_bytes, image_threshold):
+ return None
+
+ return base64.b64encode(image_bytes).decode('utf-8')
+
+ @classmethod
+ def b64_pngs(cls, path_id: str, files: list[str]):
+ png_messages = []
+ for filename in files:
+ b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
+ if b64_string:
+ png_messages.append({
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{b64_string}"
+ }
+ })
+ return png_messages
+
+ @staticmethod
+ def remove_directory(path):
+ try:
+ if os.path.exists(path):
+ if os.path.isdir(path):
+ shutil.rmtree(path)
+ except Exception as e:
+ print(f"An error occurred while trying to remove {path}: {str(e)}")
diff --git a/modules/helper/logger.py b/modules/helper/logger.py
new file mode 100644
index 0000000..762766a
--- /dev/null
+++ b/modules/helper/logger.py
@@ -0,0 +1,23 @@
+import logging
+from functools import wraps
+
+
+class LoggerHelper:
+
+ @staticmethod
+ def suppress_loggers():
+ def decorator(f):
+ @wraps(f)
+ def wrapped(*args, **kwargs):
+ root_logger = logging.getLogger()
+ original_level = root_logger.level
+
+ root_logger.setLevel(logging.ERROR)
+
+ try:
+ return f(*args, **kwargs)
+ finally:
+ root_logger.setLevel(original_level)
+
+ return wrapped
+ return decorator
diff --git a/modules/training_content/__init__.py b/modules/training_content/__init__.py
new file mode 100644
index 0000000..772b4b7
--- /dev/null
+++ b/modules/training_content/__init__.py
@@ -0,0 +1,7 @@
+from .kb import TrainingContentKnowledgeBase
+from .service import TrainingContentService
+
+__all__ = [
+ "TrainingContentService",
+ "TrainingContentKnowledgeBase"
+]
diff --git a/modules/training_content/dtos.py b/modules/training_content/dtos.py
new file mode 100644
index 0000000..2133f49
--- /dev/null
+++ b/modules/training_content/dtos.py
@@ -0,0 +1,29 @@
+from pydantic import BaseModel
+from typing import List
+
+
+class QueryDTO(BaseModel):
+ category: str
+ text: str
+
+
+class DetailsDTO(BaseModel):
+ exam_id: str
+ date: int
+ performance_comment: str
+ detailed_summary: str
+
+
+class WeakAreaDTO(BaseModel):
+ area: str
+ comment: str
+
+
+class TrainingContentDTO(BaseModel):
+ details: List[DetailsDTO]
+ weak_areas: List[WeakAreaDTO]
+ queries: List[QueryDTO]
+
+
+class TipsDTO(BaseModel):
+ tip_ids: List[str]
diff --git a/modules/training_content/kb.py b/modules/training_content/kb.py
new file mode 100644
index 0000000..dbca899
--- /dev/null
+++ b/modules/training_content/kb.py
@@ -0,0 +1,85 @@
+import json
+import os
+from logging import getLogger
+from typing import Dict, List
+
+import faiss
+import pickle
+
+
+class TrainingContentKnowledgeBase:
+
+ def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
+ self._embedding_model = embeddings
+ self._tips = None # self._read_json(path)
+ self._category_metadata = None
+ self._indices = None
+ self._logger = getLogger(__name__)
+
+ @staticmethod
+ def _read_json(path: str) -> Dict[str, any]:
+ with open(path, 'r', encoding="utf-8") as json_file:
+ return json.loads(json_file.read())
+
+ def print_category_count(self):
+ category_tips = {}
+ for unit in self._tips['units']:
+ for page in unit['pages']:
+ for tip in page['tips']:
+ category = tip['category'].lower().replace(" ", "_")
+ if category not in category_tips:
+ category_tips[category] = 0
+ else:
+ category_tips[category] = category_tips[category] + 1
+ print(category_tips)
+
+ def create_embeddings_and_save_them(self) -> None:
+ category_embeddings = {}
+ category_metadata = {}
+
+ for unit in self._tips['units']:
+ for page in unit['pages']:
+ for tip in page['tips']:
+ category = tip['category'].lower().replace(" ", "_")
+ if category not in category_embeddings:
+ category_embeddings[category] = []
+ category_metadata[category] = []
+
+ category_embeddings[category].append(tip['embedding'])
+ category_metadata[category].append({"id": tip['id'], "text": tip['text']})
+
+ category_indices = {}
+ for category, embeddings in category_embeddings.items():
+ embeddings_array = self._embedding_model.encode(embeddings)
+ index = faiss.IndexFlatL2(embeddings_array.shape[1])
+ index.add(embeddings_array)
+ category_indices[category] = index
+
+ faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
+
+ with open("./faiss/tips_metadata.pkl", "wb") as f:
+ pickle.dump(category_metadata, f)
+
+ def load_indices_and_metadata(
+ self,
+ directory: str = './faiss',
+ suffix: str = '_tips_index.faiss',
+ metadata_path: str = './faiss/tips_metadata.pkl'
+ ):
+ files = os.listdir(directory)
+ self._indices = {}
+ for file in files:
+ if file.endswith(suffix):
+ self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
+ self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
+
+ with open(metadata_path, 'rb') as f:
+ self._category_metadata = pickle.load(f)
+ self._logger.info("Loaded tips metadata")
+
+ def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
+ query_embedding = self._embedding_model.encode([query])
+ index = self._indices[category]
+ D, I = index.search(query_embedding, top_k)
+ results = [self._category_metadata[category][i] for i in I[0]]
+ return results
diff --git a/modules/training_content/service.py b/modules/training_content/service.py
new file mode 100644
index 0000000..f583571
--- /dev/null
+++ b/modules/training_content/service.py
@@ -0,0 +1,409 @@
+import json
+from datetime import datetime
+from logging import getLogger
+
+from typing import Dict, List
+
+from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
+
+
+class TrainingContentService:
+
+ TOOLS = [
+ 'critical_thinking',
+ 'language_for_writing',
+ 'reading_skills',
+ 'strategy',
+ 'words',
+ 'writing_skills'
+ ]
+ # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
+
+ def __init__(self, kb, openai, firestore):
+ self._training_content_module = kb
+ self._db = firestore
+ self._logger = getLogger(__name__)
+ self._llm = openai
+
+ def get_tips(self, training_content):
+ user, stats = training_content["userID"], training_content["stats"]
+ exam_data, exam_map = self._sort_out_solutions(stats)
+ training_content = self._get_exam_details_and_tips(exam_data)
+ tips = self._query_kb(training_content.queries)
+ usefull_tips = self._get_usefull_tips(exam_data, tips)
+ exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
+
+ weak_areas = {"weak_areas": []}
+ for area in training_content.weak_areas:
+ weak_areas["weak_areas"].append(area.dict())
+
+ training_doc = {
+ 'created_at': int(datetime.now().timestamp() * 1000),
+ **exam_map,
+ **usefull_tips.dict(),
+ **weak_areas,
+ "user": user
+ }
+ doc_ref = self._db.collection('training').add(training_doc)
+ return {
+ "id": doc_ref[1].id
+ }
+
+ @staticmethod
+ def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
+ new_exam_map = {"exams": []}
+ for detail in details:
+ new_exam_map["exams"].append({
+ "id": detail.exam_id,
+ "date": detail.date,
+ "performance_comment": detail.performance_comment,
+ "detailed_summary": detail.detailed_summary,
+ **exam_map[detail.exam_id]
+ })
+ return new_exam_map
+
+ def _query_kb(self, queries: List[QueryDTO]):
+ map_categories = {
+ "critical_thinking": "ct_focus",
+ "language_for_writing": "language_for_writing",
+ "reading_skills": "reading_skill",
+ "strategy": "strategy",
+ "writing_skills": "writing_skill"
+ }
+
+ tips = {"tips": []}
+ for query in queries:
+ if query.category == "words":
+ tips["tips"].extend(
+ self._training_content_module.query_knowledge_base(query.text, "word_link")
+ )
+ tips["tips"].extend(
+ self._training_content_module.query_knowledge_base(query.text, "word_partners")
+ )
+ else:
+ if query.category in map_categories:
+ tips["tips"].extend(
+ self._training_content_module.query_knowledge_base(query.text, map_categories[query.category])
+ )
+ else:
+ self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
+ return tips
+
+ def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
+ json_schema = (
+ '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
+ ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
+ )
+ messages = [
+ {
+ "role": "user",
+ "content": (
+ f"I'm going to provide you with exam data, you will take the exam data and fill this json "
+ f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
+ 'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
+ 'summary of the student\'s performance, "weak_areas" are identified areas'
+ ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
+ ' with sentence structure and punctuation.", the "queries" field is where you will write queries '
+ 'for tips that will be displayed to the student, the category attribute is a collection of '
+ 'embeddings and the text will be the text used to query the knowledge base. The categories are '
+ f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
+ '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
+ ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
+ 'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
+ 'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
+ 'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
+ 'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
+ 'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
+ 'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
+ 'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
+ )
+ },
+ {
+ "role": "user",
+ "content": f'Exam Data: {str(exam_data)}'
+ }
+ ]
+ return self._llm.prediction(messages, self._map_gpt_response, json_schema)
+
+ def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
+ json_schema = (
+ '{ "tip_ids": [] }'
+ )
+ messages = [
+ {
+ "role": "user",
+ "content": (
+ f"I'm going to provide you with tips and I want you to return to me the tips that "
+ f"can be usefull for the student that made the exam that I'm going to send you, return "
+ f"me the tip ids in this json format {json_schema}."
+ )
+ },
+ {
+ "role": "user",
+ "content": f'Exam Data: {str(exam_data)}'
+ },
+ {
+ "role": "user",
+ "content": f'Tips: {str(tips)}'
+ }
+ ]
+ return self._llm.prediction(messages, lambda response: TipsDTO(**response), json_schema)
+
+ @staticmethod
+ def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
+ parsed_response = {
+ "details": [DetailsDTO(**detail) for detail in response["details"]],
+ "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
+ "queries": [QueryDTO(**query) for query in response["queries"]]
+ }
+ return TrainingContentDTO(**parsed_response)
+
+ def _sort_out_solutions(self, stats):
+ grouped_stats = {}
+ for stat in stats:
+ session_key = f'{str(stat["date"])}-{stat["user"]}'
+ module = stat["module"]
+ exam_id = stat["exam"]
+
+ if session_key not in grouped_stats:
+ grouped_stats[session_key] = {}
+ if module not in grouped_stats[session_key]:
+ grouped_stats[session_key][module] = {
+ "stats": [],
+ "exam_id": exam_id
+ }
+ grouped_stats[session_key][module]["stats"].append(stat)
+
+ exercises = {}
+ exam_map = {}
+ for session_key, modules in grouped_stats.items():
+ exercises[session_key] = {}
+ for module, module_stats in modules.items():
+ exercises[session_key][module] = {}
+
+ exam_id = module_stats["exam_id"]
+ if exam_id not in exercises[session_key][module]:
+ exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
+
+ exam_total_questions = 0
+ exam_total_correct = 0
+
+ for stat in module_stats["stats"]:
+ exam_total_questions += stat["score"]["total"]
+ exam_total_correct += stat["score"]["correct"]
+ exercises[session_key][module][exam_id]["date"] = stat["date"]
+
+ if session_key not in exam_map:
+ exam_map[session_key] = {"stat_ids": [], "score": 0}
+ exam_map[session_key]["stat_ids"].append(stat["id"])
+
+ exam = self._get_doc_by_id(module, exam_id)
+ if module == "listening":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_listening_solutions(stat, exam))
+ elif module == "reading":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_reading_solutions(stat, exam))
+ elif module == "writing":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_writing_prompts_and_answers(stat, exam)
+ )
+ elif module == "speaking":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_speaking_solutions(stat, exam)
+ )
+ elif module == "level":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_level_solutions(stat, exam)
+ )
+
+ exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
+ exam_map[session_key]["module"] = module
+ with open('exam_result.json', 'w') as file:
+ json.dump({"exams": exercises}, file, indent=4)
+
+ return {"exams": exercises}, exam_map
+
+ def _get_writing_prompts_and_answers(self, stat, exam):
+ result = []
+ try:
+ exercises = []
+ for solution in stat['solutions']:
+ answer = solution['solution']
+ exercise_id = solution['id']
+ exercises.append({
+ "exercise_id": exercise_id,
+ "answer": answer
+ })
+ for exercise in exercises:
+ for exam_exercise in exam["exercises"]:
+ if exam_exercise["id"] == exercise["exercise_id"]:
+ result.append({
+ "exercise": exam_exercise["prompt"],
+ "answer": exercise["answer"]
+ })
+
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+
+ return result
+
+ @staticmethod
+ def _get_mc_question(exercise, stat):
+ shuffle_maps = stat.get("shuffleMaps", [])
+ answer = stat["solutions"] if len(shuffle_maps) == 0 else []
+ if len(shuffle_maps) != 0:
+ for solution in stat["solutions"]:
+ shuffle_map = [
+ item["map"] for item in shuffle_maps
+ if item["questionID"] == solution["question"]
+ ]
+ answer.append({
+ "question": solution["question"],
+ "option": shuffle_map[solution["option"]]
+ })
+ return {
+ "question": exercise["prompt"],
+ "exercise": exercise["questions"],
+ "answer": stat["solutions"]
+ }
+
+ @staticmethod
+ def _swap_key_name(d, original_key, new_key):
+ d[new_key] = d.pop(original_key)
+ return d
+
+ def _get_level_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "fillBlanks":
+ result.append({
+ "prompt": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": [
+ self._swap_key_name(item, 'solution', 'option')
+ for item in stat["solutions"]
+ ]
+ })
+ elif stat["type"] == "multipleChoice":
+ result.append(self._get_mc_question(exercise, stat))
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+ def _get_listening_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "writeBlanks":
+ result.append({
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "solution": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "fillBlanks":
+ result.append({
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "multipleChoice":
+ result.append(self._get_mc_question(exercise, stat))
+
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+ @staticmethod
+ def _find_shuffle_map(shuffle_maps, question_id):
+ return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
+
+ def _get_speaking_solutions(self, stat, exam):
+ result = {}
+ try:
+ result = {
+ "comments": {
+ key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
+ ,
+ "exercises": {}
+ }
+
+ for exercise in exam["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "interactiveSpeaking":
+ for i in range(len(exercise["prompts"])):
+ result["exercises"][f"exercise_{i+1}"] = {
+ "question": exercise["prompts"][i]["text"]
+ }
+ for i in range(len(exercise["prompts"])):
+ answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
+ result["exercises"][f"exercise_{i+1}"]["answer"] = answer
+ elif stat["type"] == "speaking":
+ result["exercises"]["exercise_1"] = {
+ "question": exercise["text"],
+ "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
+ }
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return [result]
+
+ def _get_reading_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ text = part["text"]
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "fillBlanks":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "writeBlanks":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "trueFalse":
+ result.append({
+ "text": text,
+ "questions": exercise["questions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "matchSentences":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "sentences": exercise["sentences"],
+ "options": exercise["options"],
+ "answer": stat["solutions"]
+ })
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+ def _get_doc_by_id(self, collection: str, doc_id: str):
+ collection_ref = self._db.collection(collection)
+ doc_ref = collection_ref.document(doc_id)
+ doc = doc_ref.get()
+
+ if doc.exists:
+ return doc.to_dict()
+ return None
diff --git a/modules/upload_level/__init__.py b/modules/upload_level/__init__.py
new file mode 100644
index 0000000..781a962
--- /dev/null
+++ b/modules/upload_level/__init__.py
@@ -0,0 +1,5 @@
+from .service import UploadLevelService
+
+__all__ = [
+ "UploadLevelService"
+]
diff --git a/modules/upload_level/exam_dtos.py b/modules/upload_level/exam_dtos.py
new file mode 100644
index 0000000..656caa2
--- /dev/null
+++ b/modules/upload_level/exam_dtos.py
@@ -0,0 +1,57 @@
+from pydantic import BaseModel, Field
+from typing import List, Dict, Union, Optional, Any
+from uuid import uuid4, UUID
+
+
+class Option(BaseModel):
+ id: str
+ text: str
+
+
+class MultipleChoiceQuestion(BaseModel):
+ id: str
+ prompt: str
+ variant: str = "text"
+ solution: str
+ options: List[Option]
+
+
+class MultipleChoiceExercise(BaseModel):
+ id: UUID = Field(default_factory=uuid4)
+ type: str = "multipleChoice"
+ prompt: str = "Select the appropriate option."
+ questions: List[MultipleChoiceQuestion]
+ userSolutions: List = Field(default_factory=list)
+
+
+class FillBlanksWord(BaseModel):
+ id: str
+ options: Dict[str, str]
+
+
+class FillBlanksSolution(BaseModel):
+ id: str
+ solution: str
+
+
+class FillBlanksExercise(BaseModel):
+ id: UUID = Field(default_factory=uuid4)
+ type: str = "fillBlanks"
+ variant: str = "mc"
+ prompt: str = "Click a blank to select the appropriate word for it."
+ text: str
+ solutions: List[FillBlanksSolution]
+ words: List[FillBlanksWord]
+ userSolutions: List = Field(default_factory=list)
+
+
+Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
+
+
+class Part(BaseModel):
+ exercises: List[Exercise]
+ context: Optional[str] = Field(default=None)
+
+
+class Exam(BaseModel):
+ parts: List[Part]
diff --git a/modules/upload_level/mapper.py b/modules/upload_level/mapper.py
new file mode 100644
index 0000000..6c39b0e
--- /dev/null
+++ b/modules/upload_level/mapper.py
@@ -0,0 +1,66 @@
+from typing import Dict, Any
+
+from pydantic import ValidationError
+
+from modules.upload_level.exam_dtos import (
+ MultipleChoiceExercise,
+ FillBlanksExercise,
+ Part, Exam
+)
+from modules.upload_level.sheet_dtos import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
+
+
+class ExamMapper:
+
+ @staticmethod
+ def map_to_exam_model(response: Dict[str, Any]) -> Exam:
+ parts = []
+ for part in response['parts']:
+ part_exercises = part['exercises']
+ context = part.get('context', None)
+
+ exercises = []
+ for exercise in part_exercises:
+ exercise_type = exercise['type']
+ if exercise_type == 'multipleChoice':
+ exercise_model = MultipleChoiceExercise(**exercise)
+ elif exercise_type == 'fillBlanks':
+ exercise_model = FillBlanksExercise(**exercise)
+ else:
+ raise ValidationError(f"Unknown exercise type: {exercise_type}")
+
+ exercises.append(exercise_model)
+
+ part_kwargs = {"exercises": exercises}
+ if context is not None:
+ part_kwargs["context"] = context
+
+ part_model = Part(**part_kwargs)
+ parts.append(part_model)
+
+ return Exam(parts=parts)
+
+ @staticmethod
+ def map_to_sheet(response: Dict[str, Any]) -> Sheet:
+ components = []
+
+ for item in response["components"]:
+ component_type = item["type"]
+
+ if component_type == "multipleChoice":
+ options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
+ components.append(MultipleChoiceQuestion(
+ id=item["id"],
+ prompt=item["prompt"],
+ variant=item.get("variant", "text"),
+ options=options
+ ))
+ elif component_type == "fillBlanks":
+ components.append(FillBlanksWord(
+ id=item["id"],
+ options=item["options"]
+ ))
+ else:
+ components.append(item)
+
+ return Sheet(components=components)
diff --git a/modules/upload_level/service.py b/modules/upload_level/service.py
new file mode 100644
index 0000000..85c46d1
--- /dev/null
+++ b/modules/upload_level/service.py
@@ -0,0 +1,395 @@
+import json
+import os
+import uuid
+from logging import getLogger
+
+from typing import Dict, Any, Tuple, Callable
+
+import pdfplumber
+
+from modules import GPT
+from modules.helper.file_helper import FileHelper
+from modules.helper import LoggerHelper
+from modules.upload_level.exam_dtos import Exam
+from modules.upload_level.mapper import ExamMapper
+from modules.upload_level.sheet_dtos import Sheet
+
+
+class UploadLevelService:
+ def __init__(self, openai: GPT):
+ self._logger = getLogger(__name__)
+ self._llm = openai
+
+ def generate_level_from_file(self, file) -> Dict[str, Any] | None:
+ ext, path_id = self._save_upload(file)
+ FileHelper.convert_file_to_pdf(
+ f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
+ )
+ file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
+
+ if not file_has_images:
+ FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
+
+ completion: Callable[[str], Exam] = self._png_completion if file_has_images else self._html_completion
+ response = completion(path_id)
+
+ FileHelper.remove_directory(f'./tmp/{path_id}')
+
+ if response:
+ return self.fix_ids(response.dict(exclude_none=True))
+ return None
+
+ @staticmethod
+ @LoggerHelper.suppress_loggers()
+ def _check_pdf_for_images(pdf_path: str) -> bool:
+ with pdfplumber.open(pdf_path) as pdf:
+ for page in pdf.pages:
+ if page.images:
+ return True
+ return False
+
+ @staticmethod
+ def _save_upload(file) -> Tuple[str, str]:
+ ext = file.filename.split('.')[-1]
+ path_id = str(uuid.uuid4())
+ os.makedirs(f'./tmp/{path_id}', exist_ok=True)
+
+ tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
+ file.save(tmp_filename)
+ return ext, path_id
+
+ def _level_json_schema(self):
+ return {
+ "parts": [
+ {
+ "context": "",
+ "exercises": [
+ self._multiple_choice_html(),
+ self._passage_blank_space_html()
+ ]
+ }
+ ]
+ }
+
+ def _html_completion(self, path_id: str) -> Exam:
+ with open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
+ html = f.read()
+
+ return self._llm.prediction(
+ [self._gpt_instructions_html(),
+ {
+ "role": "user",
+ "content": html
+ }
+ ],
+ ExamMapper.map_to_exam_model,
+ str(self._level_json_schema())
+ )
+
+ def _gpt_instructions_html(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
+ 'Your current task is to scrape html english questions sheets.\n\n'
+
+ 'In the question sheet you will only see 4 types of question:\n'
+ '- blank space multiple choice\n'
+ '- underline multiple choice\n'
+ '- reading passage blank space multiple choice\n'
+ '- reading passage multiple choice\n\n'
+
+ 'For the first two types of questions the template is the same but the question prompts differ, '
+ 'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
+ 'multiple "_", in the underline you must include in the prompt the to '
+ 'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
+
+ 'For the reading passage exercise you must handle the formatting of the passages. If it is a '
+ 'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
+ 'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
+ 'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
+ 'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
+ 'between paragraphs.\n\n'
+
+ 'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
+ 'structure, don\'t format the reading passages paragraphs only by the tags, try to figure '
+ 'out the best paragraph separation possible.'
+
+ 'You will place all the information in a single JSON: {"parts": [{"exercises": [{...}], "context": ""}]}\n '
+ 'Where {...} are the exercises templates for each part of a question sheet and the optional field '
+ 'context.'
+
+ 'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
+ 'so that you can group the exercises by the parts that are in the html, this is crucial since only '
+ 'reading passage multiple choice require context and if the context is included in parts where it '
+ 'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
+
+ 'The templates for the exercises are the following:\n'
+ '- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
+ f'{self._multiple_choice_html()}\n'
+ f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
+
+ 'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
+ 'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
+ 'exercises exclude the context field.'
+ )
+ }
+
+ @staticmethod
+ def _multiple_choice_html():
+ return {
+ "type": "multipleChoice",
+ "prompt": "Select the appropriate option.",
+ "questions": [
+ {
+ "id": "",
+ "prompt": "",
+ "solution": "",
+ "options": [
+ {
+ "id": "A",
+ "text": ""
+ },
+ {
+ "id": "B",
+ "text": ""
+ },
+ {
+ "id": "C",
+ "text": ""
+ },
+ {
+ "id": "D",
+ "text": ""
+ }
+ ]
+ }
+ ]
+ }
+
+ @staticmethod
+ def _passage_blank_space_html():
+ return {
+ "type": "fillBlanks",
+ "variant": "mc",
+ "prompt": "Click a blank to select the appropriate word for it.",
+ "text": (
+ "}} with 2 newlines between paragraphs>"
+ ),
+ "solutions": [
+ {
+ "id": "",
+ "solution": ""
+ }
+ ],
+ "words": [
+ {
+ "id": "",
+ "options": {
+ "A": "",
+ "B": "",
+ "C": "",
+ "D": ""
+ }
+ }
+ ]
+ }
+
+ def _png_completion(self, path_id: str) -> Exam:
+ FileHelper.pdf_to_png(path_id)
+
+ tmp_files = os.listdir(f'./tmp/{path_id}')
+ pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
+ pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
+
+ json_schema = {
+ "components": [
+ {"type": "part", "part": ""},
+ self._multiple_choice_png(),
+ {"type": "blanksPassage", "text": (
+ "}} with 2 newlines between paragraphs>"
+ )},
+ {"type": "passage", "context": (
+ ""
+ )},
+ self._passage_blank_space_png()
+ ]
+ }
+
+ components = []
+
+ for i in range(len(pages)):
+ current_page = pages[i]
+ next_page = pages[i + 1] if i + 1 < len(pages) else None
+ batch = [current_page, next_page] if next_page else [current_page]
+
+ sheet = self._png_batch(path_id, batch, json_schema)
+ sheet.batch = i + 1
+ components.append(sheet.dict())
+
+ batches = {"batches": components}
+ with open('output.json', 'w') as json_file:
+ json.dump(batches, json_file, indent=4)
+
+ return self._batches_to_exam_completion(batches)
+
+ def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
+ return self._llm.prediction(
+ [self._gpt_instructions_png(),
+ {
+ "role": "user",
+ "content": [
+ *FileHelper.b64_pngs(path_id, files)
+ ]
+ }
+ ],
+ ExamMapper.map_to_sheet,
+ str(json_schema)
+ )
+
+ def _gpt_instructions_png(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
+ 'Your current task is to scan english questions sheets.\n\n'
+
+ 'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
+ 'sheet components you will retrieve from the images, the components and their corresponding JSON '
+ 'templates are as follows:\n'
+
+ '- Part, a standalone part or part of a section of the question sheet: '
+ '{"type": "part", "part": ""}\n'
+
+ '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
+ 'the prompt field of the template: blanks, underlines and normal. '
+
+ 'In the blanks prompt you must leave 5 underscores to represent the blank space. '
+ 'In the underlines questions the objective is to pick the words that are incorrect in the given '
+ 'sentence, for these questions you must wrap the answer to the question with the html tag , '
+ 'choose 3 other words to wrap in , place them in the prompt field and use the underlined words '
+ 'in the order they appear in the question for the options A to D, disreguard options that might be '
+ 'included underneath the underlines question and use the ones you wrapped in .'
+ 'In normal you just leave the question as is. '
+
+ f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
+
+ '- Reading Passages, there are two types of reading passages. Reading passages where you will see '
+ 'blanks represented by a (question id) followed by a line, you must format these types of reading '
+ 'passages to be only the text with the brackets that have the question id and line replaced with '
+ '"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
+ 'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
+ 'and place 2 newlines between paragraphs. '
+
+ 'For the reading passages with blanks the template is: {"type": "blanksPassage", '
+ '"text": "}} also place 2 newlines between paragraphs>"}. '
+
+ 'For the reading passage without blanks is: {"type": "passage", "context": ""}\n'
+
+ '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
+ 'options with the question id and the options from a to d. The template is: '
+ f'{self._passage_blank_space_png()}\n'
+
+ 'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
+ 'reading passages are cut off don\'t include them in the JSON.'
+ )
+ }
+
+ def _multiple_choice_png(self):
+ multiple_choice = self._multiple_choice_html()["questions"][0]
+ multiple_choice["type"] = "multipleChoice"
+ multiple_choice.pop("solution")
+ return multiple_choice
+
+ def _passage_blank_space_png(self):
+ passage_blank_space = self._passage_blank_space_html()["words"][0]
+ passage_blank_space["type"] = "fillBlanks"
+ return passage_blank_space
+
+ def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
+ return self._llm.prediction(
+ [self._gpt_instructions_html(),
+ {
+ "role": "user",
+ "content": str(batches)
+ }
+ ],
+ ExamMapper.map_to_exam_model,
+ str(self._level_json_schema())
+ )
+
+ def _gpt_instructions_batches(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
+ 'components and solve the questions. Each batch may contain overlapping content with the previous '
+ 'batch, or close enough content which needs to be excluded. The components are as follows:'
+
+ '- Part, a standalone part or part of a section of the question sheet: '
+ '{"type": "part", "part": ""}\n'
+
+ '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
+ 'the prompt field of the template: blanks, underlines and normal. '
+
+ 'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
+ 'appropriate option to solve it.'
+
+ 'In a underlines question, the prompt has 4 underlines represented by the html tags , you must '
+ 'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
+ 'the order in which the underlines appear in the prompt you will need to fix it.'
+
+ 'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
+ 'select the appropriate solution.'
+
+ f'The template for these questions is the same: {self._multiple_choice_png()}\n'
+
+ '- Reading Passages, there are two types of reading passages with different templates. The one with '
+ 'type "blanksPassage" where the text field holds the passage and a blank is represented by '
+ '{{}} and the other one with type "passage" that has the context field with just '
+ 'reading passages. For both of these components you will have to remove any additional data that might '
+ 'be related to a question description and also remove some "()" and "_" from blanksPassage'
+ ' if there are any. These components are used in conjunction with other ones.'
+
+ '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
+ 'options with the question id and the options from a to d. The template is: '
+ f'{self._passage_blank_space_png()}\n\n'
+
+ 'Now that you know the possible components here\'s what I want you to do:\n'
+ '1. Remove duplicates. A batch will have duplicates of other batches and the components of '
+ 'the next batch should always take precedence over the previous one batch, what I mean by this is that '
+ 'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
+ 'you pick the next one.\n'
+ '2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
+ 'exercise. For the multiple choice question follow the previous instruction to solve them and place '
+ f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
+ 'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
+ f'the template for this exercise: {self._passage_blank_space_html()}.\n'
+ f'3. Restructure the JSON to match this template: {self._level_json_schema()}. You must group the exercises by '
+ 'the parts in the order they appear in the batches components. The context field of a part is the '
+ 'context of a passage component that has text relevant to normal multiple choice questions.\n'
+
+ 'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
+ 'in your response and correctly structure the JSON.'
+ )
+ }
+
+ @staticmethod
+ def fix_ids(response):
+ counter = 1
+ for part in response["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["type"] == "multipleChoice":
+ for question in exercise["questions"]:
+ question["id"] = counter
+ counter += 1
+ if exercise["type"] == "fillBlanks":
+ for i in range(len(exercise["words"])):
+ exercise["words"][i]["id"] = counter
+ exercise["solutions"][i]["id"] = counter
+ counter += 1
+ return response
\ No newline at end of file
diff --git a/modules/upload_level/sheet_dtos.py b/modules/upload_level/sheet_dtos.py
new file mode 100644
index 0000000..8efac82
--- /dev/null
+++ b/modules/upload_level/sheet_dtos.py
@@ -0,0 +1,29 @@
+from pydantic import BaseModel
+from typing import List, Dict, Union, Any, Optional
+
+
+class Option(BaseModel):
+ id: str
+ text: str
+
+
+class MultipleChoiceQuestion(BaseModel):
+ type: str = "multipleChoice"
+ id: str
+ prompt: str
+ variant: str = "text"
+ options: List[Option]
+
+
+class FillBlanksWord(BaseModel):
+ type: str = "fillBlanks"
+ id: str
+ options: Dict[str, str]
+
+
+Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
+
+
+class Sheet(BaseModel):
+ batch: Optional[int] = None
+ components: List[Component]
diff --git a/requirements.txt b/requirements.txt
index 978ac46..8afd38d 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/tmp/placeholder.txt b/tmp/placeholder.txt
new file mode 100644
index 0000000..f89d219
--- /dev/null
+++ b/tmp/placeholder.txt
@@ -0,0 +1 @@
+THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
\ No newline at end of file