diff --git a/.env b/.env
index 6097ff7..8c214ae 100644
--- a/.env
+++ b/.env
@@ -1,7 +1,8 @@
+ENV=local
OPENAI_API_KEY=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
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
-GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
+GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
diff --git a/.gitignore b/.gitignore
index 05ce478..aecdd8d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,5 @@ __pycache__
.idea
.env
.DS_Store
-firebase-configs/local.json
.venv
+scripts
diff --git a/.idea/ielts-be.iml b/.idea/ielts-be.iml
index 7af039d..a9631c9 100644
--- a/.idea/ielts-be.iml
+++ b/.idea/ielts-be.iml
@@ -5,9 +5,10 @@
+
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index f6104af..6601cfb 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,9 @@
-
+
+
+
+
diff --git a/Dockerfile b/Dockerfile
index 6ecadc0..64e8726 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,12 +18,16 @@ COPY . ./
COPY --from=requirements-stage /tmp/requirements.txt /app/requirements.txt
-RUN apt update && apt install -y ffmpeg
-
-RUN pip install openai-whisper
-
-# openai-whisper model in not compatible with the newer 2.0.0 numpy release
-RUN pip install --upgrade numpy<2
+RUN apt update && apt install -y \
+ ffmpeg \
+ poppler-utils \
+ texlive-latex-base \
+ texlive-fonts-recommended \
+ texlive-latex-extra \
+ texlive-xetex \
+ pandoc \
+ librsvg2-bin \
+ && rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir -r /app/requirements.txt
diff --git a/README.md b/README.md
index 910c41e..e22b313 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,5 @@
-# Disclaimer
+Latest refactor from develop's branch commit 5d5cd21 2024-08-28
-I didn't fully test all the endpoints, the main purpose of this release was for ielts-be to be async but I've also
-separated logic through different layers, removed some duplication and implemented dependency injection, so there
-could be errors and extensive testing is needed before even considering deploying (if you're even considering it).
-
-The version this was refactored from was master's branch commit a4caecd 2024-06-13
-
-# Changes
-
-Since one of my use cases is load testing with 5000 concurrent users and ielts-be is sync, I've refactored ielts-be
-into this fastapi app.
-
-The ielts-be Dockerfile runs the container with:
-
-```CMD exec gunicorn --bind 0.0.0.0:5000 --workers 1 --threads 8 --timeout 0 app:app```
-
-And since gunicorn uses WSGI and ielts-be has mostly sync I/O blocking operations, everytime a request encounters
-an I/O blocking operation a thread is blocked. Since this config is 1 worker with 8 threads, the container
-will only be able to handle 8 concurrent requests at a time before gcloud run cold starts another instance.
-
-Flask was built with WSGI in mind, having Quart as it's async alternative, even though you can serve Flask
-with uvicorn using the [asgiref](https://pypi.org/project/asgiref/) adapter, FastAPI has better performance
-than both alternatives and the sync calls would need to be modified either way.
# Endpoints
@@ -29,34 +7,38 @@ In ielts-ui I've added a wrapper to every backend request in '/src/utils/transla
new endpoints if the "BACKEND_TYPE" environment variable is set to "async", if the env variable is not present or
with another value, the wrapper will return the old endpoint.
-| Method | ielts-be | This one |
-|--------|--------------------------------------|------------------------------------------|
-| GET | /healthcheck | /api/healthcheck |
-| GET | /listening_section_1 | /api/listening/section/1 |
-| GET | /listening_section_2 | /api/listening/section/2 |
-| GET | /listening_section_3 | /api/listening/section/3 |
-| GET | /listening_section_4 | /api/listening/section/4 |
-| POST | /listening | /api/listening |
-| POST | /writing_task1 | /api/grade/writing/1 |
-| POST | /writing_task2 | /api/grade/writing/2 |
-| GET | /writing_task1_general | /api/writing/1 |
-| GET | /writing_task2_general | /api/writing/2 |
-| POST | /speaking_task_1 | /api/grade/speaking/1 |
-| POST | /speaking_task_2 | /api/grade/speaking/2 |
-| POST | /speaking_task_3 | /api/grade/speaking/3 |
-| GET | /speaking_task_1 | /api/speaking/1 |
-| GET | /speaking_task_2 | /api/speaking/2 |
-| GET | /speaking_task_3 | /api/speaking/3 |
-| POST | /speaking | /api/speaking |
-| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
-| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
-| GET | /reading_passage_1 | /api/reading/passage/1 |
-| GET | /reading_passage_2 | /api/reading/passage/2 |
-| GET | /reading_passage_3 | /api/reading/passage/3 |
-| GET | /level | /api/level |
-| GET | /level_utas | /api/level/utas |
-| POST | /fetch_tips | /api/training/tips |
-| POST | /grading_summary | /api/grade/summary |
+| Method | ielts-be | This one |
+|--------|--------------------------------------|---------------------------------------------|
+| GET | /healthcheck | /api/healthcheck |
+| GET | /listening_section_1 | /api/listening/section/1 |
+| GET | /listening_section_2 | /api/listening/section/2 |
+| GET | /listening_section_3 | /api/listening/section/3 |
+| GET | /listening_section_4 | /api/listening/section/4 |
+| POST | /listening | /api/listening |
+| POST | /writing_task1 | /api/grade/writing/1 |
+| POST | /writing_task2 | /api/grade/writing/2 |
+| GET | /writing_task1_general | /api/writing/1 |
+| GET | /writing_task2_general | /api/writing/2 |
+| POST | /speaking_task_1 | /api/grade/speaking/1 |
+| POST | /speaking_task_2 | /api/grade/speaking/2 |
+| POST | /speaking_task_3 | /api/grade/speaking/3 |
+| GET | /speaking_task_1 | /api/speaking/1 |
+| GET | /speaking_task_2 | /api/speaking/2 |
+| GET | /speaking_task_3 | /api/speaking/3 |
+| POST | /speaking | /api/speaking |
+| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
+| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
+| GET | /reading_passage_1 | /api/reading/passage/1 |
+| GET | /reading_passage_2 | /api/reading/passage/2 |
+| GET | /reading_passage_3 | /api/reading/passage/3 |
+| GET | /level | /api/level |
+| GET | /level_utas | /api/level/utas |
+| POST | /fetch_tips | /api/training/tips |
+| POST | /grading_summary | /api/grade/summary |
+| POST | /grade_short_answers | /api/grade/short_answers |
+| POST | /upload_level | /api/level/upload |
+| POST | /training_content | /api/training/ |
+| POST | /custom_level | /api/level/custom |
# Run the app
@@ -64,9 +46,7 @@ This is for Windows, creating venv and activating it may differ based on your OS
1. python -m venv env
2. env\Scripts\activate
-3. pip install openai-whisper
-4. pip install --upgrade numpy<2
-5. pip install poetry
-6. poetry install
-7. python main.py
+3. pip install poetry
+4. poetry install
+5. python app.py
diff --git a/app.py b/app.py
index 684a422..80feec4 100644
--- a/app.py
+++ b/app.py
@@ -1,1154 +1,30 @@
-import threading
-from functools import reduce
-
-import firebase_admin
-from firebase_admin import credentials
-from flask import Flask, request
-from flask_jwt_extended import JWTManager, jwt_required
-
-from helper.api_messages import *
-from helper.exam_variant import ExamVariant
-from helper.exercises import *
-from helper.file_helper import delete_files_older_than_one_day
-from helper.firebase_helper import *
-from helper.heygen_api import create_video, create_videos_and_save_to_db
-from helper.openai_interface import *
-from helper.question_templates import *
-from helper.speech_to_text_helper import *
-from heygen.AvatarEnum import AvatarEnum
-
-load_dotenv()
-
-app = Flask(__name__)
-
-app.config['JWT_SECRET_KEY'] = os.getenv("JWT_SECRET_KEY")
-jwt = JWTManager(app)
-
-# Initialize Firebase Admin SDK
-cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
-FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
-
-firebase_admin.initialize_app(cred)
-
-thread_event = threading.Event()
-
-# Configure logging
-logging.basicConfig(level=logging.DEBUG, # Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
- format='%(asctime)s - %(levelname)s - %(message)s')
-
-
-@app.route('/healthcheck', methods=['GET'])
-def healthcheck():
- return {"healthy": True}
-
-
-@app.route('/listening_section_1', methods=['GET'])
-@jwt_required()
-def get_listening_section_1_question():
- try:
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(two_people_scenarios))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
-
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
-
- processed_conversation = generate_listening_1_conversation(topic)
-
- app.logger.info("Generated conversation: " + str(processed_conversation))
-
- start_id = 1
- exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
- number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": processed_conversation,
- "difficulty": difficulty
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/listening_section_2', methods=['GET'])
-@jwt_required()
-def get_listening_section_2_question():
- try:
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(social_monologue_contexts))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
-
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
-
- monologue = generate_listening_2_monologue(topic)
-
- app.logger.info("Generated monologue: " + str(monologue))
- start_id = 11
- exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": monologue,
- "difficulty": difficulty
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/listening_section_3', methods=['GET'])
-@jwt_required()
-def get_listening_section_3_question():
- try:
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(four_people_scenarios))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
-
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
-
- processed_conversation = generate_listening_3_conversation(topic)
-
- app.logger.info("Generated conversation: " + str(processed_conversation))
-
- start_id = 21
- exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
- number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": processed_conversation,
- "difficulty": difficulty
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/listening_section_4', methods=['GET'])
-@jwt_required()
-def get_listening_section_4_question():
- try:
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(academic_subjects))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
-
- if (len(req_exercises) == 0):
- req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
-
- number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
-
- monologue = generate_listening_4_monologue(topic)
-
- app.logger.info("Generated monologue: " + str(monologue))
- start_id = 31
- exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
- start_id, difficulty)
- return {
- "exercises": exercises,
- "text": monologue,
- "difficulty": difficulty
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/listening', methods=['POST'])
-@jwt_required()
-def save_listening():
- try:
- data = request.get_json()
- parts = data.get('parts')
- minTimer = data.get('minTimer', LISTENING_MIN_TIMER_DEFAULT)
- difficulty = data.get('difficulty', random.choice(difficulties))
- template = getListeningTemplate()
- template['difficulty'] = difficulty
- id = str(uuid.uuid4())
- for i, part in enumerate(parts, start=0):
- part_template = getListeningPartTemplate()
-
- file_name = str(uuid.uuid4()) + ".mp3"
- sound_file_path = AUDIO_FILES_PATH + file_name
- firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
- if "conversation" in part["text"]:
- conversation_text_to_speech(part["text"]["conversation"], sound_file_path)
- else:
- text_to_speech(part["text"], sound_file_path)
- file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
-
- part_template["audio"]["source"] = file_url
- part_template["exercises"] = part["exercises"]
-
- template['parts'].append(part_template)
-
- if minTimer != LISTENING_MIN_TIMER_DEFAULT:
- template["minTimer"] = minTimer
- template["variant"] = ExamVariant.PARTIAL.value
- else:
- template["variant"] = ExamVariant.FULL.value
-
- (result, id) = save_to_db_with_id("listening", template, id)
- if result:
- return {**template, "id": id}
- else:
- raise Exception("Failed to save question: " + parts)
- except Exception as e:
- return str(e)
-
-
-@app.route('/writing_task1', methods=['POST'])
-@jwt_required()
-def grade_writing_task_1():
- try:
- data = request.get_json()
- question = data.get('question')
- answer = data.get('answer')
- if not has_words(answer):
- return {
- 'comment': "The answer does not contain enough english words.",
- 'overall': 0,
- 'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
- }
- }
- elif not has_x_words(answer, 100):
- return {
- 'comment': "The answer is insufficient and too small to be graded.",
- 'overall': 0,
- 'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
- }
- }
- else:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"perfect_answer": "example perfect answer", "comment": '
- '"comment about answer quality", "overall": 0.0, "task_response": '
- '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
- '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
- },
- {
- "role": "user",
- "content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, '
- 'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
- 'from the task, and assign a score of 0 if the response fails to address the question. '
- 'Additionally, provide an exemplary answer with a minimum of 150 words, along with a '
- 'detailed commentary highlighting both strengths and weaknesses in the response. '
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
- },
- {
- "role": "user",
- "content": 'The perfect answer must have at least 150 words.'
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count,
- ["comment"],
- GRADING_TEMPERATURE)
- response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
- response['fixed_text'] = get_fixed_text(answer)
- return response
- except Exception as e:
- return str(e)
-
-
-@app.route('/writing_task1_general', methods=['GET'])
-@jwt_required()
-def get_writing_task_1_general_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"prompt": "prompt content"}')
- },
- {
- "role": "user",
- "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
- 'student to compose a letter. The prompt should present a specific scenario or situation, '
- 'based on the topic of "' + topic + '", requiring the student to provide information, '
- 'advice, or instructions within the letter. '
- 'Make sure that the generated prompt is '
- 'of ' + difficulty + 'difficulty and does not contain '
- 'forbidden subjects in muslim '
- 'countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt",
- GEN_QUESTION_TEMPERATURE)
- return {
- "question": response["prompt"].strip(),
- "difficulty": difficulty,
- "topic": topic
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/writing_task2', methods=['POST'])
-@jwt_required()
-def grade_writing_task_2():
- try:
- data = request.get_json()
- question = data.get('question')
- answer = data.get('answer')
- if not has_words(answer):
- return {
- 'comment': "The answer does not contain enough english words.",
- 'overall': 0,
- 'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
- }
- }
- elif not has_x_words(answer, 180):
- return {
- 'comment': "The answer is insufficient and too small to be graded.",
- 'overall': 0,
- 'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
- }
- }
- else:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"perfect_answer": "example perfect answer", "comment": '
- '"comment about answer quality", "overall": 0.0, "task_response": '
- '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
- '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
- },
- {
- "role": "user",
- "content": (
- 'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide an '
- 'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting '
- 'both strengths and weaknesses in the response.'
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
- },
- {
- "role": "user",
- "content": 'The perfect answer must have at least 250 words.'
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
- GEN_QUESTION_TEMPERATURE)
- response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
- response['fixed_text'] = get_fixed_text(answer)
- return response
- except Exception as e:
- return str(e)
-
-
-def fix_writing_overall(overall: float, task_response: dict):
- if overall > max(task_response.values()) or overall < min(task_response.values()):
- total_sum = sum(task_response.values())
- average = total_sum / len(task_response.values())
- rounded_average = round(average, 0)
- return rounded_average
- return overall
-
-
-@app.route('/writing_task2_general', methods=['GET'])
-@jwt_required()
-def get_writing_task_2_general_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"prompt": "prompt content"}')
- },
- {
- "role": "user",
- "content": (
- 'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate '
- 'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". '
- 'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or '
- 'examples, and present a well-rounded argument before concluding with their personal opinion on the subject.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE)
- return {
- "question": response["prompt"].strip(),
- "difficulty": difficulty,
- "topic": topic
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/speaking_task_1', methods=['POST'])
-@jwt_required()
-def grade_speaking_task_1():
- request_id = uuid.uuid4()
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
- logging.info("POST - speaking_task_1 - Received request to grade speaking task 1. "
- "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json()))
- try:
- data = request.get_json()
- question = data.get('question')
- answer_firebase_path = data.get('answer')
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + answer_firebase_path)
- download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
- logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name)
-
- answer = speech_to_text(sound_file_name)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer)
-
- if has_x_words(answer, 20):
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- },
- {
- "role": "user",
- "content": (
- 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide '
- 'detailed commentary highlighting both strengths and weaknesses in the response.'
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
- }
- ]
- token_count = count_total_tokens(messages)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"],
- GRADING_TEMPERATURE)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response))
-
- perfect_answer_messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"answer": "perfect answer"}')
- },
- {
- "role": "user",
- "content": (
- 'Provide a perfect answer according to ielts grading system to the following '
- 'Speaking Part 1 question: "' + question + '"')
- }
- ]
- token_count = count_total_tokens(perfect_answer_messages)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.")
- response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO,
- perfect_answer_messages,
- token_count,
- ["answer"],
- GEN_QUESTION_TEMPERATURE)["answer"]
- logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - Perfect answer: " + response['perfect_answer'])
-
- response['transcript'] = answer
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting fixed text.")
- response['fixed_text'] = get_speaking_corrections(answer)
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
-
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
- response["task_response"]["Lexical Resource"] + response["task_response"][
- "Grammatical Range and Accuracy"] + response["task_response"][
- "Pronunciation"]) / 4, 1)
-
- logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response))
- return response
- else:
- logging.info("POST - speaking_task_1 - " + str(
- request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer)
- return {
- "comment": "The audio recorded does not contain enough english words to be graded.",
- "overall": 0,
- "task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
- }
- }
- except Exception as e:
- os.remove(sound_file_name)
- return str(e), 400
-
-
-@app.route('/speaking_task_1', methods=['GET'])
-@jwt_required()
-def get_speaking_task_1_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "question": "question"}')
- },
- {
- "role": "user",
- "content": (
- 'Craft a thought-provoking question of ' + difficulty + ' difficulty for IELTS Speaking Part 1 '
- 'that encourages candidates to delve deeply into '
- 'personal experiences, preferences, or insights on the topic '
- 'of "' + topic + '". Instruct the candidate '
- 'to offer not only detailed '
- 'descriptions but also provide '
- 'nuanced explanations, examples, '
- 'or anecdotes to enrich their response. '
- 'Make sure that the generated question '
- 'does not contain forbidden subjects in '
- 'muslim countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, ["topic"],
- GEN_QUESTION_TEMPERATURE)
- response["type"] = 1
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
- except Exception as e:
- return str(e)
-
-
-@app.route('/speaking_task_2', methods=['POST'])
-@jwt_required()
-def grade_speaking_task_2():
- request_id = uuid.uuid4()
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
- logging.info("POST - speaking_task_2 - Received request to grade speaking task 2. "
- "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json()))
- try:
- data = request.get_json()
- question = data.get('question')
- answer_firebase_path = data.get('answer')
-
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Downloading file " + answer_firebase_path)
- download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
- logging.info("POST - speaking_task_2 - " + str(
- request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name)
-
- answer = speech_to_text(sound_file_name)
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Transcripted answer: " + answer)
-
- if has_x_words(answer, 20):
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- },
- {
- "role": "user",
- "content": (
- 'Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide '
- 'detailed commentary highlighting both strengths and weaknesses in the response.'
- '\n Question: "' + question + '" \n Answer: "' + answer + '"')
- }
- ]
- token_count = count_total_tokens(messages)
-
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting grading of the answer.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count,["comment"],
- GRADING_TEMPERATURE)
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Answer graded: " + str(response))
-
- perfect_answer_messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"answer": "perfect answer"}')
- },
- {
- "role": "user",
- "content": (
- 'Provide a perfect answer according to ielts grading system to the following '
- 'Speaking Part 2 question: "' + question + '"')
- }
- ]
- token_count = count_total_tokens(perfect_answer_messages)
-
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting perfect answer.")
- response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO,
- perfect_answer_messages,
- token_count,
- ["answer"],
- GEN_QUESTION_TEMPERATURE)["answer"]
- logging.info("POST - speaking_task_2 - " + str(
- request_id) + " - Perfect answer: " + response['perfect_answer'])
-
- response['transcript'] = answer
-
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting fixed text.")
- response['fixed_text'] = get_speaking_corrections(answer)
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
-
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
- response["task_response"]["Lexical Resource"] + response["task_response"][
- "Grammatical Range and Accuracy"] + response["task_response"][
- "Pronunciation"]) / 4, 1)
-
- logging.info("POST - speaking_task_2 - " + str(request_id) + " - Final response: " + str(response))
- return response
- else:
- logging.info("POST - speaking_task_2 - " + str(
- request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer)
- return {
- "comment": "The audio recorded does not contain enough english words to be graded.",
- "overall": 0,
- "task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
- }
- }
- except Exception as e:
- os.remove(sound_file_name)
- return str(e), 400
-
-
-@app.route('/speaking_task_2', methods=['GET'])
-@jwt_required()
-def get_speaking_task_2_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}')
- },
- {
- "role": "user",
- "content": (
- 'Create a question of ' + difficulty + ' difficulty for IELTS Speaking Part 2 '
- 'that encourages candidates to narrate a '
- 'personal experience or story related to the topic '
- 'of "' + topic + '". Include 3 prompts that '
- 'guide the candidate to describe '
- 'specific aspects of the experience, '
- 'such as details about the situation, '
- 'their actions, and the reasons it left a '
- 'lasting impression. Make sure that the '
- 'generated question does not contain '
- 'forbidden subjects in muslim countries.')
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
- response["type"] = 2
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
- except Exception as e:
- return str(e)
-
-
-@app.route('/speaking_task_3', methods=['GET'])
-@jwt_required()
-def get_speaking_task_3_question():
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- topic = request.args.get("topic", default=random.choice(mti_topics))
- try:
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"topic": "topic", "questions": ["question", "question", "question"]}')
- },
- {
- "role": "user",
- "content": (
- 'Formulate a set of 3 questions of ' + difficulty + ' difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a '
- 'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring '
- 'they explore various aspects, perspectives, and implications related to the topic.'
- 'Make sure that the generated question does not contain forbidden subjects in muslim countries.')
-
- }
- ]
- token_count = count_total_tokens(messages)
- response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
- # Remove the numbers from the questions only if the string starts with a number
- response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for
- question in response["questions"]]
- response["type"] = 3
- response["difficulty"] = difficulty
- response["topic"] = topic
- return response
- except Exception as e:
- return str(e)
-
-
-@app.route('/speaking_task_3', methods=['POST'])
-@jwt_required()
-def grade_speaking_task_3():
- request_id = uuid.uuid4()
- delete_files_older_than_one_day(AUDIO_FILES_PATH)
- logging.info("POST - speaking_task_3 - Received request to grade speaking task 3. "
- "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json()))
- try:
- data = request.get_json()
- answers = data.get('answers')
- text_answers = []
- perfect_answers = []
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - Received " + str(len(answers)) + " total answers.")
- for item in answers:
- sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
-
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Downloading file " + item["answer"])
- download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - Downloaded file " + item["answer"] + " to " + sound_file_name)
-
- answer_text = speech_to_text(sound_file_name)
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Transcripted answer: " + answer_text)
-
- text_answers.append(answer_text)
- item["answer"] = answer_text
- os.remove(sound_file_name)
- if not has_x_words(answer_text, 20):
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer_text)
- return {
- "comment": "The audio recorded does not contain enough english words to be graded.",
- "overall": 0,
- "task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
- }
- }
-
- perfect_answer_messages = [
- {
- "role": "system",
- "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"answer": "perfect answer"}')
- },
- {
- "role": "user",
- "content": (
- 'Provide a perfect answer according to ielts grading system to the following '
- 'Speaking Part 3 question: "' + item["question"] + '"')
- }
- ]
- token_count = count_total_tokens(perfect_answer_messages)
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - Requesting perfect answer for question: " + item["question"])
- perfect_answers.append(make_openai_call(GPT_3_5_TURBO,
- perfect_answer_messages,
- token_count,
- ["answer"],
- GEN_QUESTION_TEMPERATURE))
-
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- }
- ]
- message = (
- "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a "
- "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
- "assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
- "commentary highlighting both strengths and weaknesses in the response."
- "\n\n The questions and answers are: \n\n'")
-
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Formatting answers and questions for prompt.")
- formatted_text = ""
- for i, entry in enumerate(answers, start=1):
- formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
- formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - Formatted answers and questions for prompt: " + formatted_text)
-
- message += formatted_text
-
- messages.append({
- "role": "user",
- "content": message
- })
-
- token_count = count_total_tokens(messages)
-
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Requesting grading of the answers.")
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], GRADING_TEMPERATURE)
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Answers graded: " + str(response))
-
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Adding perfect answers to response.")
- for i, answer in enumerate(perfect_answers, start=1):
- response['perfect_answer_' + str(i)] = answer
-
- logging.info("POST - speaking_task_3 - " + str(
- request_id) + " - Adding transcript and fixed texts to response.")
- for i, answer in enumerate(text_answers, start=1):
- response['transcript_' + str(i)] = answer
- response['fixed_text_' + str(i)] = get_speaking_corrections(answer)
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["task_response"][
- "Lexical Resource"] + response["task_response"]["Grammatical Range and Accuracy"] +
- response["task_response"]["Pronunciation"]) / 4, 1)
- logging.info("POST - speaking_task_3 - " + str(request_id) + " - Final response: " + str(response))
- return response
- except Exception as e:
- return str(e), 400
-
-
-@app.route('/speaking', methods=['POST'])
-@jwt_required()
-def save_speaking():
- try:
- data = request.get_json()
- exercises = data.get('exercises')
- minTimer = data.get('minTimer', SPEAKING_MIN_TIMER_DEFAULT)
- template = getSpeakingTemplate()
- template["minTimer"] = minTimer
-
- if minTimer < SPEAKING_MIN_TIMER_DEFAULT:
- template["variant"] = ExamVariant.PARTIAL.value
- else:
- template["variant"] = ExamVariant.FULL.value
-
- id = str(uuid.uuid4())
- app.logger.info('Received request to save speaking with id: ' + id)
- thread_event.set()
- thread = threading.Thread(
- target=create_videos_and_save_to_db,
- args=(exercises, template, id),
- name=("thread-save-speaking-" + id)
- )
- thread.start()
- app.logger.info('Started thread to save speaking. Thread: ' + thread.getName())
-
- # Return response without waiting for create_videos_and_save_to_db to finish
- return {**template, "id": id}
- except Exception as e:
- return str(e)
-
-
-@app.route("/speaking/generate_speaking_video", methods=['POST'])
-@jwt_required()
-def generate_speaking_video():
- try:
- data = request.get_json()
- avatar = data.get("avatar", random.choice(list(AvatarEnum)).value)
- prompts = data.get("prompts", [])
- question = data.get("question")
- if len(prompts) > 0:
- question = question + " In your answer you should consider: " + " ".join(prompts)
- sp1_result = create_video(question, avatar)
- if sp1_result is not None:
- sound_file_path = VIDEO_FILES_PATH + sp1_result
- firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
- url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
- sp1_video_path = firebase_file_path
- sp1_video_url = url
-
- return {
- "text": data["question"],
- "prompts": prompts,
- "title": data["topic"],
- "video_url": sp1_video_url,
- "video_path": sp1_video_path,
- "type": "speaking",
- "id": uuid.uuid4()
- }
- else:
- app.logger.error("Failed to create video for part 1 question: " + data["question"])
- return str("Failed to create video for part 1 question: " + data["question"])
-
- except Exception as e:
- return str(e)
-
-
-@app.route("/speaking/generate_interactive_video", methods=['POST'])
-@jwt_required()
-def generate_interactive_video():
- try:
- data = request.get_json()
- sp3_questions = []
- avatar = data.get("avatar", random.choice(list(AvatarEnum)).value)
-
- app.logger.info('Creating videos for speaking part 3')
- for question in data["questions"]:
- result = create_video(question, avatar)
- if result is not None:
- sound_file_path = VIDEO_FILES_PATH + result
- firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
- url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
- video = {
- "text": question,
- "video_path": firebase_file_path,
- "video_url": url
- }
- sp3_questions.append(video)
- else:
- app.app.logger.error("Failed to create video for part 3 question: " + question)
-
- return {
- "prompts": sp3_questions,
- "title": data["topic"],
- "type": "interactiveSpeaking",
- "id": uuid.uuid4()
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/reading_passage_1', methods=['GET'])
-@jwt_required()
-def get_reading_passage_1_question():
- try:
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(topics))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_1(topic, req_exercises, difficulty)
- except Exception as e:
- return str(e)
-
-
-@app.route('/reading_passage_2', methods=['GET'])
-@jwt_required()
-def get_reading_passage_2_question():
- try:
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(topics))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_2(topic, req_exercises, difficulty)
- except Exception as e:
- return str(e)
-
-
-@app.route('/reading_passage_3', methods=['GET'])
-@jwt_required()
-def get_reading_passage_3_question():
- try:
- # Extract parameters from the URL query string
- topic = request.args.get('topic', default=random.choice(topics))
- req_exercises = request.args.getlist('exercises')
- difficulty = request.args.get("difficulty", default=random.choice(difficulties))
- return gen_reading_passage_3(topic, req_exercises, difficulty)
- except Exception as e:
- return str(e)
-
-
-@app.route('/level', methods=['GET'])
-@jwt_required()
-def get_level_exam():
- try:
- number_of_exercises = 25
- exercises = gen_multiple_choice_level(number_of_exercises)
- return {
- "exercises": [exercises],
- "isDiagnostic": False,
- "minTimer": 25,
- "module": "level"
- }
- except Exception as e:
- return str(e)
-
-@app.route('/level_utas', methods=['GET'])
-@jwt_required()
-def get_level_utas():
- try:
- # Formats
- mc = {
- "id": str(uuid.uuid4()),
- "prompt": "Choose the correct word or group of words that completes the sentences.",
- "questions": None,
- "type": "multipleChoice",
- "part": 1
- }
-
- umc = {
- "id": str(uuid.uuid4()),
- "prompt": "Choose the underlined word or group of words that is not correct.",
- "questions": None,
- "type": "multipleChoice",
- "part": 2
- }
-
- bs_1 = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and write the correct word for each space.",
- "questions": None,
- "type": "blankSpaceText",
- "part": 3
- }
-
- bs_2 = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and write the correct word for each space.",
- "questions": None,
- "type": "blankSpaceText",
- "part": 4
- }
-
- reading = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and answer the questions below.",
- "questions": None,
- "type": "readingExercises",
- "part": 5
- }
-
- all_mc_questions = []
-
- # PART 1
- mc_exercises1 = gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
- print(json.dumps(mc_exercises1, indent=4))
- all_mc_questions.append(mc_exercises1)
-
- # PART 2
- mc_exercises2 = gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
- print(json.dumps(mc_exercises2, indent=4))
- all_mc_questions.append(mc_exercises2)
-
- # PART 3
- mc_exercises3 = gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
- print(json.dumps(mc_exercises3, indent=4))
- all_mc_questions.append(mc_exercises3)
-
- mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
- print(json.dumps(mc_exercises, indent=4))
- mc["questions"] = mc_exercises
-
- # Underlined mc
- underlined_mc = gen_multiple_choice_underlined_utas(15, 46)
- print(json.dumps(underlined_mc, indent=4))
- umc["questions"] = underlined_mc
-
- # Blank Space text 1
- blank_space_text_1 = gen_blank_space_text_utas(12, 61, 250)
- print(json.dumps(blank_space_text_1, indent=4))
- bs_1["questions"] = blank_space_text_1
-
- # Blank Space text 2
- blank_space_text_2 = gen_blank_space_text_utas(14, 73, 350)
- print(json.dumps(blank_space_text_2, indent=4))
- bs_2["questions"] = blank_space_text_2
-
- # Reading text
- reading_text = gen_reading_passage_utas(87, 10, 4)
- print(json.dumps(reading_text, indent=4))
- reading["questions"] = reading_text
-
- return {
- "exercises": {
- "blankSpaceMultipleChoice": mc,
- "underlinedMultipleChoice": umc,
- "blankSpaceText1": bs_1,
- "blankSpaceText2": bs_2,
- "readingExercises": reading,
- },
- "isDiagnostic": False,
- "minTimer": 25,
- "module": "level"
- }
- except Exception as e:
- return str(e)
-
-
-@app.route('/fetch_tips', methods=['POST'])
-@jwt_required()
-def fetch_answer_tips():
- try:
- data = request.get_json()
- context = data.get('context')
- question = data.get('question')
- answer = data.get('answer')
- correct_answer = data.get('correct_answer')
- messages = get_question_tips(question, answer, correct_answer, context)
- token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
- map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
- response = make_openai_call(GPT_3_5_TURBO, messages, token_count, None, TIPS_TEMPERATURE)
-
- if isinstance(response, str):
- response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
-
- return response
- except Exception as e:
- return str(e)
-
-
-@app.route('/grading_summary', methods=['POST'])
-@jwt_required()
-def grading_summary():
- # Body Format
- # {'sections': Array of {'code': key, 'name': name, 'grade': grade}}
- # Output Format
- # {'sections': Array of {'code': key, 'name': name, 'grade': grade, 'evaluation': evaluation, 'suggestions': suggestions}}
- try:
- return calculate_grading_summary(request.get_json())
- except Exception as e:
- return str(e)
-
-
-if __name__ == '__main__':
- app.run()
+import os
+
+import click
+import uvicorn
+from dotenv import load_dotenv
+
+
+@click.command()
+@click.option(
+ "--env",
+ type=click.Choice(["local", "dev", "prod"], case_sensitive=False),
+ default="local",
+)
+def main(env: str):
+ load_dotenv()
+ os.environ["ENV"] = env
+ if env == "prod":
+ raise Exception("Production environment not supported yet!")
+
+ uvicorn.run(
+ app="app.server:app",
+ host="localhost",
+ port=8000,
+ reload=True if env != "prod" else False,
+ workers=1,
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/app/api/grade.py b/app/api/grade.py
index 645b4b4..7f054e7 100644
--- a/app/api/grade.py
+++ b/app/api/grade.py
@@ -2,7 +2,8 @@ from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Depends, Path, Request
from app.controllers.abc import IGradeController
-from app.dtos import WritingGradeTaskDTO
+from app.dtos.writing import WritingGradeTaskDTO
+from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
controller = "grade_controller"
@@ -22,18 +23,29 @@ async def grade_writing_task(
return await grade_controller.grade_writing_task(task, data)
+@grade_router.post(
+ '/speaking/2',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def grade_speaking_task_2(
+ data: GradeSpeakingDTO,
+ grade_controller: IGradeController = Depends(Provide[controller])
+):
+ return await grade_controller.grade_speaking_task(2, [data.dict()])
+
+
@grade_router.post(
'/speaking/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
-async def grade_speaking_task(
- request: Request,
+async def grade_speaking_task_1_and_3(
+ data: GradeSpeakingAnswersDTO,
task: int = Path(..., ge=1, le=3),
grade_controller: IGradeController = Depends(Provide[controller])
):
- data = await request.json()
- return await grade_controller.grade_speaking_task(task, data)
+ return await grade_controller.grade_speaking_task(task, data.answers)
@grade_router.post(
@@ -47,3 +59,16 @@ async def grading_summary(
):
data = await request.json()
return await grade_controller.grading_summary(data)
+
+
+@grade_router.post(
+ '/short_answers',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def grade_short_answers(
+ request: Request,
+ grade_controller: IGradeController = Depends(Provide[controller])
+):
+ data = await request.json()
+ return await grade_controller.grade_short_answers(data)
diff --git a/app/api/level.py b/app/api/level.py
index a91bfdd..0c9a791 100644
--- a/app/api/level.py
+++ b/app/api/level.py
@@ -1,5 +1,5 @@
from dependency_injector.wiring import Provide, inject
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, UploadFile, Request
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ILevelController
@@ -27,4 +27,29 @@ async def get_level_exam(
async def get_level_utas(
level_controller: ILevelController = Depends(Provide[controller])
):
- return await level_controller.get_level_exam()
+ return await level_controller.get_level_utas()
+
+
+@level_router.post(
+ '/upload',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def upload(
+ file: UploadFile,
+ level_controller: ILevelController = Depends(Provide[controller])
+):
+ return await level_controller.upload_level(file)
+
+
+@level_router.post(
+ '/custom',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def custom_level(
+ request: Request,
+ level_controller: ILevelController = Depends(Provide[controller])
+):
+ data = await request.json()
+ return await level_controller.get_custom_level(data)
diff --git a/app/api/listening.py b/app/api/listening.py
index 8b46bab..3fd15d4 100644
--- a/app/api/listening.py
+++ b/app/api/listening.py
@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IListeningController
from app.configs.constants import EducationalContent
-from app.dtos import SaveListeningDTO
+from app.dtos.listening import SaveListeningDTO
controller = "listening_controller"
diff --git a/app/api/speaking.py b/app/api/speaking.py
index 130e6c2..ee32422 100644
--- a/app/api/speaking.py
+++ b/app/api/speaking.py
@@ -6,24 +6,40 @@ from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import ISpeakingController
-from app.dtos import SaveSpeakingDTO, SpeakingGenerateVideoDTO, SpeakingGenerateInteractiveVideoDTO
+from app.dtos.speaking import (
+ SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO
+)
controller = "speaking_controller"
speaking_router = APIRouter()
+@speaking_router.get(
+ '/1',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def get_speaking_task(
+ first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
+ second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
+ difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
+ speaking_controller: ISpeakingController = Depends(Provide[controller])
+):
+ return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic)
+
+
@speaking_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
- task: int = Path(..., ge=1, le=3),
+ task: int = Path(..., ge=2, le=3),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
- return await speaking_controller.get_speaking_task(task, topic, difficulty)
+ return await speaking_controller.get_speaking_part(task, topic, difficulty)
@speaking_router.post(
@@ -40,24 +56,42 @@ async def save_speaking(
@speaking_router.post(
- '/generate_speaking_video',
+ '/generate_video/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
-async def generate_speaking_video(
- data: SpeakingGenerateVideoDTO,
+async def generate_video_1(
+ data: GenerateVideo1DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
- return await speaking_controller.generate_speaking_video(data)
+ return await speaking_controller.generate_video(
+ 1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic
+ )
@speaking_router.post(
- '/generate_interactive_video',
+ '/generate_video/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
-async def generate_interactive_video(
- data: SpeakingGenerateInteractiveVideoDTO,
+async def generate_video_2(
+ data: GenerateVideo2DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
- return await speaking_controller.generate_interactive_video(data)
+ return await speaking_controller.generate_video(
+ 2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix
+ )
+
+
+@speaking_router.post(
+ '/generate_video/3',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def generate_video_3(
+ data: GenerateVideo3DTO,
+ speaking_controller: ISpeakingController = Depends(Provide[controller])
+):
+ return await speaking_controller.generate_video(
+ 3, data.avatar, data.topic, data.questions
+ )
diff --git a/app/api/training.py b/app/api/training.py
index a9ad5ae..8c4e44e 100644
--- a/app/api/training.py
+++ b/app/api/training.py
@@ -1,7 +1,7 @@
from dependency_injector.wiring import Provide, inject
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, Request
-from app.dtos import TipsDTO
+from app.dtos.training import FetchTipsDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ITrainingController
@@ -15,7 +15,20 @@ training_router = APIRouter()
)
@inject
async def get_reading_passage(
- data: TipsDTO,
+ data: FetchTipsDTO,
training_controller: ITrainingController = Depends(Provide[controller])
):
return await training_controller.fetch_tips(data)
+
+
+@training_router.post(
+ '/',
+ dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
+)
+@inject
+async def training_content(
+ request: Request,
+ training_controller: ITrainingController = Depends(Provide[controller])
+):
+ data = await request.json()
+ return await training_controller.get_training_content(data)
diff --git a/app/configs/constants.py b/app/configs/constants.py
index 9534d70..bb58dba 100644
--- a/app/configs/constants.py
+++ b/app/configs/constants.py
@@ -2,7 +2,7 @@ from enum import Enum
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
- "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
+ "discrimination", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "discrimination", "discriminatory"]
@@ -11,6 +11,26 @@ class ExamVariant(Enum):
PARTIAL = "partial"
+class CustomLevelExerciseTypes(Enum):
+ MULTIPLE_CHOICE_4 = "multiple_choice_4"
+ MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
+ MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
+ BLANK_SPACE_TEXT = "blank_space_text"
+ READING_PASSAGE_UTAS = "reading_passage_utas"
+ WRITING_LETTER = "writing_letter"
+ WRITING_2 = "writing_2"
+ SPEAKING_1 = "speaking_1"
+ SPEAKING_2 = "speaking_2"
+ SPEAKING_3 = "speaking_3"
+ READING_1 = "reading_1"
+ READING_2 = "reading_2"
+ READING_3 = "reading_3"
+ LISTENING_1 = "listening_1"
+ LISTENING_2 = "listening_2"
+ LISTENING_3 = "listening_3"
+ LISTENING_4 = "listening_4"
+
+
class QuestionType(Enum):
LISTENING_SECTION_1 = "Listening Section 1"
LISTENING_SECTION_2 = "Listening Section 2"
@@ -63,7 +83,14 @@ class FieldsAndExercises:
GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
+ READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
+
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
+ LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
+ 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
+ LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
+ LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
+ LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
TOTAL_READING_PASSAGE_1_EXERCISES = 13
TOTAL_READING_PASSAGE_2_EXERCISES = 13
@@ -218,7 +245,6 @@ class EducationalContent:
"Space Exploration",
"Artificial Intelligence",
"Climate Change",
- "World Religions",
"The Human Brain",
"Renewable Energy",
"Cultural Diversity",
diff --git a/app/configs/dependency_injection.py b/app/configs/dependency_injection.py
index c269f6f..c1d13fd 100644
--- a/app/configs/dependency_injection.py
+++ b/app/configs/dependency_injection.py
@@ -1,3 +1,4 @@
+import json
import os
from dependency_injector import providers, containers
@@ -6,6 +7,7 @@ from openai import AsyncOpenAI
from httpx import AsyncClient as HTTPClient
from google.cloud.firestore_v1 import AsyncClient as FirestoreClient
from dotenv import load_dotenv
+from sentence_transformers import SentenceTransformer
from app.repositories.impl import *
from app.services.impl import *
@@ -60,16 +62,26 @@ def config_di(
writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector)
+ with open('app/services/impl/level/mc_variants.json', 'r') as file:
+ mc_variants = json.load(file)
+
level_service = providers.Factory(
- LevelService, llm=llm, document_store=firestore, reading_service=reading_service
+ LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service,
+ writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service
)
grade_service = providers.Factory(
GradeService, llm=llm
)
+ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
+
+ training_kb = providers.Factory(
+ TrainingContentKnowledgeBase, embeddings=embeddings
+ )
+
training_service = providers.Factory(
- TrainingService, llm=llm
+ TrainingService, llm=llm, firestore=firestore, training_kb=training_kb
)
# Controllers
diff --git a/app/controllers/abc/grade.py b/app/controllers/abc/grade.py
index fc851b2..162e246 100644
--- a/app/controllers/abc/grade.py
+++ b/app/controllers/abc/grade.py
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
-from typing import Dict
+from typing import Dict, List
class IGradeController(ABC):
@@ -9,18 +9,14 @@ class IGradeController(ABC):
pass
@abstractmethod
- async def grade_speaking_task(self, task: int, data: Dict):
+ async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
+ pass
+
+ @abstractmethod
+ async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def grading_summary(self, data: Dict):
pass
- @abstractmethod
- async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
- pass
-
- @abstractmethod
- async def _grade_speaking_task3(self, answers: Dict):
- pass
-
diff --git a/app/controllers/abc/level.py b/app/controllers/abc/level.py
index 0cef88a..43fe296 100644
--- a/app/controllers/abc/level.py
+++ b/app/controllers/abc/level.py
@@ -1,5 +1,8 @@
from abc import ABC, abstractmethod
+from fastapi import UploadFile
+from typing import Dict
+
class ILevelController(ABC):
@@ -10,3 +13,11 @@ class ILevelController(ABC):
@abstractmethod
async def get_level_utas(self):
pass
+
+ @abstractmethod
+ async def upload_level(self, file: UploadFile):
+ pass
+
+ @abstractmethod
+ async def get_custom_level(self, data: Dict):
+ pass
diff --git a/app/controllers/abc/speaking.py b/app/controllers/abc/speaking.py
index d7d48c0..6b96a23 100644
--- a/app/controllers/abc/speaking.py
+++ b/app/controllers/abc/speaking.py
@@ -1,11 +1,13 @@
from abc import ABC, abstractmethod
+from typing import Optional
+
from fastapi import BackgroundTasks
class ISpeakingController(ABC):
@abstractmethod
- async def get_speaking_task(self, task: int, topic: str, difficulty: str):
+ async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
pass
@abstractmethod
@@ -13,9 +15,11 @@ class ISpeakingController(ABC):
pass
@abstractmethod
- async def generate_speaking_video(self, data):
- pass
-
- @abstractmethod
- async def generate_interactive_video(self, data):
+ async def generate_video(
+ self, part: int, avatar: str, topic: str, questions: list[str],
+ *,
+ second_topic: Optional[str] = None,
+ prompts: Optional[list[str]] = None,
+ suffix: Optional[str] = None,
+ ):
pass
diff --git a/app/controllers/abc/training.py b/app/controllers/abc/training.py
index 2ba831f..1ce25c0 100644
--- a/app/controllers/abc/training.py
+++ b/app/controllers/abc/training.py
@@ -6,3 +6,7 @@ class ITrainingController(ABC):
@abstractmethod
async def fetch_tips(self, data):
pass
+
+ @abstractmethod
+ async def get_training_content(self, data):
+ pass
diff --git a/app/controllers/impl/grade.py b/app/controllers/impl/grade.py
index 791b57b..3474664 100644
--- a/app/controllers/impl/grade.py
+++ b/app/controllers/impl/grade.py
@@ -1,17 +1,12 @@
import logging
-import os
-import uuid
-from typing import Dict
-
-from fastapi import HTTPException
-from pydantic import ValidationError
+from typing import Dict, List
from app.configs.constants import FilePaths
from app.controllers.abc import IGradeController
-from app.dtos.speaking import SpeakingGradeTask1And2DTO, SpeakingGradeTask3DTO
from app.dtos.writing import WritingGradeTaskDTO
-from app.helpers import IOHelper
+from app.helpers import FileHelper
from app.services.abc import ISpeakingService, IWritingService, IGradeService
+from app.utils import handle_exception
class GradeController(IGradeController):
@@ -28,47 +23,20 @@ class GradeController(IGradeController):
self._logger = logging.getLogger(__name__)
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO):
- try:
- return await self._writing_service.grade_writing_task(task, data.question, data.answer)
- except Exception as e:
- return str(e)
+ return await self._writing_service.grade_writing_task(task, data.question, data.answer)
- async def grade_speaking_task(self, task: int, data: Dict):
- try:
- if task in {1, 2}:
- body = SpeakingGradeTask1And2DTO(**data)
- return await self._grade_speaking_task_1_2(task, body.question, body.answer)
- else:
- body = SpeakingGradeTask3DTO(**data)
- return await self._grade_speaking_task3(body.answers)
- except ValidationError as e:
- raise HTTPException(status_code=422, detail=e.errors())
+ @handle_exception(400)
+ async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
+ FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
+ return await self._speaking_service.grade_speaking_task(task, answers)
+
+ async def grade_short_answers(self, data: Dict):
+ return await self._service.grade_short_answers(data)
async def grading_summary(self, data: Dict):
- try:
- section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
- extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
- return await self._service.calculate_grading_summary(extracted_sections)
- except Exception as e:
- return str(e)
-
- async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
- sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
- try:
- IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
- return await self._speaking_service.grade_speaking_task_1_and_2(
- task, question, answer_firebase_path, sound_file_name
- )
- except Exception as e:
- os.remove(sound_file_name)
- return str(e), 400
-
- async def _grade_speaking_task3(self, answers: Dict):
- try:
- IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
- return await self._speaking_service.grade_speaking_task_3(answers)
- except Exception as e:
- return str(e), 400
+ section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
+ extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
+ return await self._service.calculate_grading_summary(extracted_sections)
@staticmethod
def _extract_existing_sections_from_body(my_dict, keys_to_extract):
diff --git a/app/controllers/impl/level.py b/app/controllers/impl/level.py
index 622163c..eacb202 100644
--- a/app/controllers/impl/level.py
+++ b/app/controllers/impl/level.py
@@ -1,3 +1,6 @@
+from fastapi import UploadFile
+from typing import Dict
+
from app.controllers.abc import ILevelController
from app.services.abc import ILevelService
@@ -8,13 +11,13 @@ class LevelController(ILevelController):
self._service = level_service
async def get_level_exam(self):
- try:
- return await self._service.get_level_exam()
- except Exception as e:
- return str(e)
+ return await self._service.get_level_exam()
async def get_level_utas(self):
- try:
- return await self._service.get_level_utas()
- except Exception as e:
- return str(e)
+ return await self._service.get_level_utas()
+
+ async def upload_level(self, file: UploadFile):
+ return await self._service.upload_level(file)
+
+ async def get_custom_level(self, data: Dict):
+ return await self._service.get_custom_level(data)
diff --git a/app/controllers/impl/listening.py b/app/controllers/impl/listening.py
index 9ed57d1..3095388 100644
--- a/app/controllers/impl/listening.py
+++ b/app/controllers/impl/listening.py
@@ -1,97 +1,19 @@
-import random
-import logging
from typing import List
from app.controllers.abc import IListeningController
-from app.dtos import SaveListeningDTO
+from app.dtos.listening import SaveListeningDTO
from app.services.abc import IListeningService
-from app.helpers import IOHelper, ExercisesHelper
-from app.configs.constants import (
- FilePaths, EducationalContent, FieldsAndExercises
-)
class ListeningController(IListeningController):
def __init__(self, listening_service: IListeningService):
self._service = listening_service
- self._logger = logging.getLogger(__name__)
- self._sections = {
- "section_1": {
- "topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
- "exercise_sample_size": 1,
- "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
- "type": "conversation",
- "start_id": 1
- },
- "section_2": {
- "topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
- "exercise_sample_size": 2,
- "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
- "type": "monologue",
- "start_id": 11
- },
- "section_3": {
- "topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
- "exercise_sample_size": 1,
- "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
- "type": "conversation",
- "start_id": 21
- },
- "section_4": {
- "topic": EducationalContent.ACADEMIC_SUBJECTS,
- "exercise_sample_size": 2,
- "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
- "type": "monologue",
- "start_id": 31
- }
- }
- async def get_listening_question(self, section_id: int, topic: str, req_exercises: List[str], difficulty: str):
- try:
- IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
- section = self._sections[f"section_{str(section_id)}"]
- if not topic:
- topic = random.choice(section["topic"])
-
- if len(req_exercises) == 0:
- req_exercises = random.sample(FieldsAndExercises.LISTENING_EXERCISE_TYPES, section["exercise_sample_size"])
-
- number_of_exercises_q = ExercisesHelper.divide_number_into_parts(section["total_exercises"], len(req_exercises))
-
- dialog = await self._service.generate_listening_question(section_id, topic)
-
- if section_id in {1, 3}:
- dialog = self.parse_conversation(dialog)
-
- self._logger.info(f'Generated {section["type"]}: {str(dialog)}')
-
- exercises = await self._service.generate_listening_exercises(
- section_id, str(dialog), req_exercises, number_of_exercises_q, section["start_id"], difficulty
- )
-
- return {
- "exercises": exercises,
- "text": dialog,
- "difficulty": difficulty
- }
- except Exception as e:
- return str(e)
+ async def get_listening_question(
+ self, section_id: int, topic: str, req_exercises: List[str], difficulty: str
+ ):
+ return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty)
async def save_listening(self, data: SaveListeningDTO):
- try:
- return await self._service.save_listening(data.parts, data.minTimer, data.difficulty)
- except Exception as e:
- return str(e)
-
- @staticmethod
- def parse_conversation(conversation_data):
- conversation_list = conversation_data.get('conversation', [])
- readable_text = []
-
- for message in conversation_list:
- name = message.get('name', 'Unknown')
- text = message.get('text', '')
- readable_text.append(f"{name}: {text}")
-
- return "\n".join(readable_text)
+ return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id)
diff --git a/app/controllers/impl/reading.py b/app/controllers/impl/reading.py
index da18388..d496c02 100644
--- a/app/controllers/impl/reading.py
+++ b/app/controllers/impl/reading.py
@@ -15,29 +15,29 @@ class ReadingController(IReadingController):
self._logger = logging.getLogger(__name__)
self._passages = {
"passage_1": {
+ "start_id": 1,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES
},
"passage_2": {
+ "start_id": 14,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES
},
"passage_3": {
+ "start_id": 27,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES
}
}
async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str):
- try:
- passage = self._passages[f'passage_{str(passage_id)}']
+ passage = self._passages[f'passage_{str(passage_id)}']
- if len(req_exercises) == 0:
- req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
+ if len(req_exercises) == 0:
+ req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
- number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
- passage["total_exercises"], len(req_exercises)
- )
+ number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
+ passage["total_exercises"], len(req_exercises)
+ )
- return await self._service.gen_reading_passage(
- passage_id, topic, req_exercises, number_of_exercises_q, difficulty
- )
- except Exception as e:
- return str(e)
+ return await self._service.gen_reading_passage(
+ passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"]
+ )
diff --git a/app/controllers/impl/speaking.py b/app/controllers/impl/speaking.py
index c0fbde7..7c2a383 100644
--- a/app/controllers/impl/speaking.py
+++ b/app/controllers/impl/speaking.py
@@ -1,13 +1,12 @@
import logging
import uuid
+from typing import Optional
from fastapi import BackgroundTasks
from app.controllers.abc import ISpeakingController
-from app.dtos import (
- SaveSpeakingDTO, SpeakingGenerateVideoDTO,
- SpeakingGenerateInteractiveVideoDTO
-)
+from app.dtos.speaking import SaveSpeakingDTO
+
from app.services.abc import ISpeakingService
from app.configs.constants import ExamVariant, MinTimers
from app.configs.question_templates import getSpeakingTemplate
@@ -19,45 +18,30 @@ class SpeakingController(ISpeakingController):
self._service = speaking_service
self._logger = logging.getLogger(__name__)
- async def get_speaking_task(self, task: int, topic: str, difficulty: str):
- try:
- return await self._service.get_speaking_task(task, topic, difficulty)
- except Exception as e:
- return str(e)
+ async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
+ return await self._service.get_speaking_part(task, topic, difficulty, second_topic)
async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks):
- try:
- exercises = data.exercises
- min_timer = data.minTimer
+ exercises = data.exercises
+ min_timer = data.minTimer
- template = getSpeakingTemplate()
- template["minTimer"] = min_timer
+ template = getSpeakingTemplate()
+ template["minTimer"] = min_timer
- if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
- template["variant"] = ExamVariant.PARTIAL.value
- else:
- template["variant"] = ExamVariant.FULL.value
+ if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
+ template["variant"] = ExamVariant.PARTIAL.value
+ else:
+ template["variant"] = ExamVariant.FULL.value
- req_id = str(uuid.uuid4())
- self._logger.info(f'Received request to save speaking with id: {req_id}')
+ req_id = str(uuid.uuid4())
+ self._logger.info(f'Received request to save speaking with id: {req_id}')
- background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
+ background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
- self._logger.info('Started background task to save speaking.')
+ self._logger.info('Started background task to save speaking.')
- # Return response without waiting for create_videos_and_save_to_db to finish
- return {**template, "id": req_id}
- except Exception as e:
- return str(e)
+ # Return response without waiting for create_videos_and_save_to_db to finish
+ return {**template, "id": req_id}
- async def generate_speaking_video(self, data: SpeakingGenerateVideoDTO):
- try:
- return await self._service.generate_speaking_video(data.question, data.topic, data.avatar, data.prompts)
- except Exception as e:
- return str(e)
-
- async def generate_interactive_video(self, data: SpeakingGenerateInteractiveVideoDTO):
- try:
- return await self._service.generate_interactive_video(data.questions, data.topic, data.avatar)
- except Exception as e:
- return str(e)
+ async def generate_video(self, *args, **kwargs):
+ return await self._service.generate_video(*args, **kwargs)
diff --git a/app/controllers/impl/training.py b/app/controllers/impl/training.py
index d3e3fd9..dc39017 100644
--- a/app/controllers/impl/training.py
+++ b/app/controllers/impl/training.py
@@ -1,5 +1,7 @@
+from typing import Dict
+
from app.controllers.abc import ITrainingController
-from app.dtos import TipsDTO
+from app.dtos.training import FetchTipsDTO
from app.services.abc import ITrainingService
@@ -8,8 +10,8 @@ class TrainingController(ITrainingController):
def __init__(self, training_service: ITrainingService):
self._service = training_service
- async def fetch_tips(self, data: TipsDTO):
- try:
- return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
- except Exception as e:
- return str(e)
+ async def fetch_tips(self, data: FetchTipsDTO):
+ return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
+
+ async def get_training_content(self, data: Dict):
+ return await self._service.get_training_content(data)
diff --git a/app/controllers/impl/writing.py b/app/controllers/impl/writing.py
index 45018ce..b01726d 100644
--- a/app/controllers/impl/writing.py
+++ b/app/controllers/impl/writing.py
@@ -8,7 +8,4 @@ class WritingController(IWritingController):
self._service = writing_service
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
- try:
- return await self._service.get_writing_task_general_question(task, topic, difficulty)
- except Exception as e:
- return str(e)
+ return await self._service.get_writing_task_general_question(task, topic, difficulty)
diff --git a/app/dtos/__init__.py b/app/dtos/__init__.py
index b51dd73..e69de29 100644
--- a/app/dtos/__init__.py
+++ b/app/dtos/__init__.py
@@ -1,19 +0,0 @@
-from .listening import SaveListeningDTO
-from .speaking import (
- SaveSpeakingDTO, SpeakingGradeTask1And2DTO,
- SpeakingGradeTask3DTO, SpeakingGenerateVideoDTO,
- SpeakingGenerateInteractiveVideoDTO
-)
-from .training import TipsDTO
-from .writing import WritingGradeTaskDTO
-
-__all__ = [
- "SaveListeningDTO",
- "SaveSpeakingDTO",
- "SpeakingGradeTask1And2DTO",
- "SpeakingGradeTask3DTO",
- "SpeakingGenerateVideoDTO",
- "SpeakingGenerateInteractiveVideoDTO",
- "TipsDTO",
- "WritingGradeTaskDTO"
-]
diff --git a/app/dtos/exam.py b/app/dtos/exam.py
new file mode 100644
index 0000000..779daea
--- /dev/null
+++ b/app/dtos/exam.py
@@ -0,0 +1,57 @@
+from pydantic import BaseModel, Field
+from typing import List, Dict, Union, Optional
+from uuid import uuid4, UUID
+
+
+class Option(BaseModel):
+ id: str
+ text: str
+
+
+class MultipleChoiceQuestion(BaseModel):
+ id: str
+ prompt: str
+ variant: str = "text"
+ solution: str
+ options: List[Option]
+
+
+class MultipleChoiceExercise(BaseModel):
+ id: UUID = Field(default_factory=uuid4)
+ type: str = "multipleChoice"
+ prompt: str = "Select the appropriate option."
+ questions: List[MultipleChoiceQuestion]
+ userSolutions: List = Field(default_factory=list)
+
+
+class FillBlanksWord(BaseModel):
+ id: str
+ options: Dict[str, str]
+
+
+class FillBlanksSolution(BaseModel):
+ id: str
+ solution: str
+
+
+class FillBlanksExercise(BaseModel):
+ id: UUID = Field(default_factory=uuid4)
+ type: str = "fillBlanks"
+ variant: str = "mc"
+ prompt: str = "Click a blank to select the appropriate word for it."
+ text: str
+ solutions: List[FillBlanksSolution]
+ words: List[FillBlanksWord]
+ userSolutions: List = Field(default_factory=list)
+
+
+Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
+
+
+class Part(BaseModel):
+ exercises: List[Exercise]
+ context: Optional[str] = Field(default=None)
+
+
+class Exam(BaseModel):
+ parts: List[Part]
diff --git a/app/dtos/listening.py b/app/dtos/listening.py
index 83096bd..d7e44db 100644
--- a/app/dtos/listening.py
+++ b/app/dtos/listening.py
@@ -1,4 +1,5 @@
import random
+import uuid
from typing import List, Dict
from pydantic import BaseModel
@@ -10,3 +11,4 @@ class SaveListeningDTO(BaseModel):
parts: List[Dict]
minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT
difficulty: str = random.choice(EducationalContent.DIFFICULTIES)
+ id: str = str(uuid.uuid4())
diff --git a/app/dtos/sheet.py b/app/dtos/sheet.py
new file mode 100644
index 0000000..8efac82
--- /dev/null
+++ b/app/dtos/sheet.py
@@ -0,0 +1,29 @@
+from pydantic import BaseModel
+from typing import List, Dict, Union, Any, Optional
+
+
+class Option(BaseModel):
+ id: str
+ text: str
+
+
+class MultipleChoiceQuestion(BaseModel):
+ type: str = "multipleChoice"
+ id: str
+ prompt: str
+ variant: str = "text"
+ options: List[Option]
+
+
+class FillBlanksWord(BaseModel):
+ type: str = "fillBlanks"
+ id: str
+ options: Dict[str, str]
+
+
+Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
+
+
+class Sheet(BaseModel):
+ batch: Optional[int] = None
+ components: List[Component]
diff --git a/app/dtos/speaking.py b/app/dtos/speaking.py
index 14808de..7c8b124 100644
--- a/app/dtos/speaking.py
+++ b/app/dtos/speaking.py
@@ -11,23 +11,31 @@ class SaveSpeakingDTO(BaseModel):
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
-class SpeakingGradeTask1And2DTO(BaseModel):
+class GradeSpeakingDTO(BaseModel):
question: str
answer: str
-class SpeakingGradeTask3DTO(BaseModel):
- answers: Dict
+class GradeSpeakingAnswersDTO(BaseModel):
+ answers: List[Dict]
-class SpeakingGenerateVideoDTO(BaseModel):
+class GenerateVideo1DTO(BaseModel):
+ avatar: str = (random.choice(list(AvatarEnum))).value
+ questions: List[str]
+ first_topic: str
+ second_topic: str
+
+
+class GenerateVideo2DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
prompts: List[str] = []
+ suffix: str = ""
question: str
topic: str
-class SpeakingGenerateInteractiveVideoDTO(BaseModel):
+class GenerateVideo3DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
topic: str
diff --git a/app/dtos/training.py b/app/dtos/training.py
index cb82490..d5de433 100644
--- a/app/dtos/training.py
+++ b/app/dtos/training.py
@@ -1,8 +1,37 @@
from pydantic import BaseModel
+from typing import List
-class TipsDTO(BaseModel):
+class FetchTipsDTO(BaseModel):
context: str
question: str
answer: str
correct_answer: str
+
+
+class QueryDTO(BaseModel):
+ category: str
+ text: str
+
+
+class DetailsDTO(BaseModel):
+ exam_id: str
+ date: int
+ performance_comment: str
+ detailed_summary: str
+
+
+class WeakAreaDTO(BaseModel):
+ area: str
+ comment: str
+
+
+class TrainingContentDTO(BaseModel):
+ details: List[DetailsDTO]
+ weak_areas: List[WeakAreaDTO]
+ queries: List[QueryDTO]
+
+
+class TipsDTO(BaseModel):
+ tip_ids: List[str]
+
diff --git a/app/helpers/__init__.py b/app/helpers/__init__.py
index fb1275d..eddd6da 100644
--- a/app/helpers/__init__.py
+++ b/app/helpers/__init__.py
@@ -1,11 +1,13 @@
-from .io import IOHelper
-from .text_helper import TextHelper
+from .file import FileHelper
+from .text import TextHelper
from .token_counter import count_tokens
-from .exercises_helper import ExercisesHelper
+from .exercises import ExercisesHelper
+from .logger import LoggerHelper
__all__ = [
- "IOHelper",
+ "FileHelper",
"TextHelper",
"count_tokens",
- "ExercisesHelper"
+ "ExercisesHelper",
+ "LoggerHelper"
]
diff --git a/app/helpers/exercises_helper.py b/app/helpers/exercises.py
similarity index 76%
rename from app/helpers/exercises_helper.py
rename to app/helpers/exercises.py
index 618c1da..be40bc8 100644
--- a/app/helpers/exercises_helper.py
+++ b/app/helpers/exercises.py
@@ -4,7 +4,7 @@ import re
import string
from wonderwords import RandomWord
-from .text_helper import TextHelper
+from .text import TextHelper
class ExercisesHelper:
@@ -70,7 +70,12 @@ class ExercisesHelper:
random.shuffle(combined_array)
- return combined_array
+ result = []
+ for i, word in enumerate(combined_array):
+ letter = chr(65 + i) # chr(65) is 'A'
+ result.append({"letter": letter, "word": word})
+
+ return result
@staticmethod
def fillblanks_build_solutions_array(words, start_id):
@@ -187,9 +192,58 @@ class ExercisesHelper:
@staticmethod
def fix_writing_overall(overall: float, task_response: dict):
- if overall > max(task_response.values()) or overall < min(task_response.values()):
- total_sum = sum(task_response.values())
- average = total_sum / len(task_response.values())
+ grades = [category["grade"] for category in task_response.values()]
+
+ if overall > max(grades) or overall < min(grades):
+ total_sum = sum(grades)
+ average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
+
return overall
+
+ @staticmethod
+ def build_options(ideas):
+ options = []
+ letters = iter(string.ascii_uppercase)
+ for idea in ideas:
+ options.append({
+ "id": next(letters),
+ "sentence": idea["from"]
+ })
+ return options
+
+ @staticmethod
+ def build_sentences(ideas, start_id):
+ sentences = []
+ letters = iter(string.ascii_uppercase)
+ for idea in ideas:
+ sentences.append({
+ "solution": next(letters),
+ "sentence": idea["idea"]
+ })
+
+ random.shuffle(sentences)
+ for i, sentence in enumerate(sentences, start=start_id):
+ sentence["id"] = i
+ return sentences
+
+ @staticmethod
+ def randomize_mc_options_order(questions):
+ option_ids = ['A', 'B', 'C', 'D']
+
+ for question in questions:
+ # Store the original solution text
+ original_solution_text = next(
+ option['text'] for option in question['options'] if option['id'] == question['solution'])
+
+ # Shuffle the options
+ random.shuffle(question['options'])
+
+ # Update the option ids and find the new solution id
+ for idx, option in enumerate(question['options']):
+ option['id'] = option_ids[idx]
+ if option['text'] == original_solution_text:
+ question['solution'] = option['id']
+
+ return questions
diff --git a/app/helpers/file.py b/app/helpers/file.py
new file mode 100644
index 0000000..aa3230b
--- /dev/null
+++ b/app/helpers/file.py
@@ -0,0 +1,95 @@
+import datetime
+from pathlib import Path
+import base64
+import io
+import os
+import shutil
+import subprocess
+from typing import Optional
+
+import numpy as np
+import pypandoc
+from PIL import Image
+
+import aiofiles
+
+
+class FileHelper:
+
+ @staticmethod
+ def delete_files_older_than_one_day(directory: str):
+ current_time = datetime.datetime.now()
+
+ for entry in os.scandir(directory):
+ if entry.is_file():
+ file_path = Path(entry)
+ file_name = file_path.name
+ file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
+ time_difference = current_time - file_modified_time
+ if time_difference.days > 1 and "placeholder" not in file_name:
+ file_path.unlink()
+ print(f"Deleted file: {file_path}")
+
+ # Supposedly pandoc covers a wide range of file extensions only tested with docx
+ @staticmethod
+ def convert_file_to_pdf(input_path: str, output_path: str):
+ pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
+ '-V', 'geometry:paperwidth=5.5in',
+ '-V', 'geometry:paperheight=8.5in',
+ '-V', 'geometry:margin=0.5in',
+ '-V', 'pagestyle=empty'
+ ])
+
+ @staticmethod
+ def convert_file_to_html(input_path: str, output_path: str):
+ pypandoc.convert_file(input_path, 'html', outputfile=output_path)
+
+ @staticmethod
+ def pdf_to_png(path_id: str):
+ to_png = f"pdftoppm -png exercises.pdf page"
+ result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
+ if result.returncode != 0:
+ raise Exception(
+ f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
+
+ @staticmethod
+ def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
+ with Image.open(io.BytesIO(image_bytes)) as img:
+ img_gray = img.convert('L')
+ img_array = np.array(img_gray)
+ non_white_pixels = np.sum(img_array < 255)
+
+ return non_white_pixels <= image_threshold
+
+ @classmethod
+ async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
+ async with aiofiles.open(image_path, "rb") as image_file:
+ image_bytes = await image_file.read()
+
+ if cls.is_page_blank(image_bytes, image_threshold):
+ return None
+
+ return base64.b64encode(image_bytes).decode('utf-8')
+
+ @classmethod
+ def b64_pngs(cls, path_id: str, files: list[str]):
+ png_messages = []
+ for filename in files:
+ b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
+ if b64_string:
+ png_messages.append({
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{b64_string}"
+ }
+ })
+ return png_messages
+
+ @staticmethod
+ def remove_directory(path):
+ try:
+ if os.path.exists(path):
+ if os.path.isdir(path):
+ shutil.rmtree(path)
+ except Exception as e:
+ print(f"An error occurred while trying to remove {path}: {str(e)}")
diff --git a/app/helpers/io.py b/app/helpers/io.py
deleted file mode 100644
index c170395..0000000
--- a/app/helpers/io.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import datetime
-import os
-from pathlib import Path
-
-
-class IOHelper:
-
- @staticmethod
- def delete_files_older_than_one_day(directory: str):
- current_time = datetime.datetime.now()
-
- for entry in os.scandir(directory):
- if entry.is_file():
- file_path = Path(entry)
- file_name = file_path.name
- file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
- time_difference = current_time - file_modified_time
- if time_difference.days > 1 and "placeholder" not in file_name:
- file_path.unlink()
- print(f"Deleted file: {file_path}")
diff --git a/app/helpers/logger.py b/app/helpers/logger.py
new file mode 100644
index 0000000..762766a
--- /dev/null
+++ b/app/helpers/logger.py
@@ -0,0 +1,23 @@
+import logging
+from functools import wraps
+
+
+class LoggerHelper:
+
+ @staticmethod
+ def suppress_loggers():
+ def decorator(f):
+ @wraps(f)
+ def wrapped(*args, **kwargs):
+ root_logger = logging.getLogger()
+ original_level = root_logger.level
+
+ root_logger.setLevel(logging.ERROR)
+
+ try:
+ return f(*args, **kwargs)
+ finally:
+ root_logger.setLevel(original_level)
+
+ return wrapped
+ return decorator
diff --git a/app/helpers/text_helper.py b/app/helpers/text.py
similarity index 100%
rename from app/helpers/text_helper.py
rename to app/helpers/text.py
diff --git a/app/mappers/__init__.py b/app/mappers/__init__.py
new file mode 100644
index 0000000..bc00787
--- /dev/null
+++ b/app/mappers/__init__.py
@@ -0,0 +1,5 @@
+from .exam import ExamMapper
+
+__all__ = [
+ "ExamMapper"
+]
diff --git a/app/mappers/exam.py b/app/mappers/exam.py
new file mode 100644
index 0000000..df26eea
--- /dev/null
+++ b/app/mappers/exam.py
@@ -0,0 +1,66 @@
+from typing import Dict, Any
+
+from pydantic import ValidationError
+
+from app.dtos.exam import (
+ MultipleChoiceExercise,
+ FillBlanksExercise,
+ Part, Exam
+)
+from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
+
+
+class ExamMapper:
+
+ @staticmethod
+ def map_to_exam_model(response: Dict[str, Any]) -> Exam:
+ parts = []
+ for part in response['parts']:
+ part_exercises = part['exercises']
+ context = part.get('context', None)
+
+ exercises = []
+ for exercise in part_exercises:
+ exercise_type = exercise['type']
+ if exercise_type == 'multipleChoice':
+ exercise_model = MultipleChoiceExercise(**exercise)
+ elif exercise_type == 'fillBlanks':
+ exercise_model = FillBlanksExercise(**exercise)
+ else:
+ raise ValidationError(f"Unknown exercise type: {exercise_type}")
+
+ exercises.append(exercise_model)
+
+ part_kwargs = {"exercises": exercises}
+ if context is not None:
+ part_kwargs["context"] = context
+
+ part_model = Part(**part_kwargs)
+ parts.append(part_model)
+
+ return Exam(parts=parts)
+
+ @staticmethod
+ def map_to_sheet(response: Dict[str, Any]) -> Sheet:
+ components = []
+
+ for item in response["components"]:
+ component_type = item["type"]
+
+ if component_type == "multipleChoice":
+ options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
+ components.append(MultipleChoiceQuestion(
+ id=item["id"],
+ prompt=item["prompt"],
+ variant=item.get("variant", "text"),
+ options=options
+ ))
+ elif component_type == "fillBlanks":
+ components.append(FillBlanksWord(
+ id=item["id"],
+ options=item["options"]
+ ))
+ else:
+ components.append(item)
+
+ return Sheet(components=components)
diff --git a/app/repositories/abc/document_store.py b/app/repositories/abc/document_store.py
index 4afe66d..78b0a12 100644
--- a/app/repositories/abc/document_store.py
+++ b/app/repositories/abc/document_store.py
@@ -11,3 +11,6 @@ class IDocumentStore(ABC):
async def get_all(self, collection: str):
pass
+
+ async def get_doc_by_id(self, collection: str, doc_id: str):
+ pass
diff --git a/app/repositories/impl/document_stores/firestore.py b/app/repositories/impl/document_stores/firestore.py
index f95badf..30f4e3e 100644
--- a/app/repositories/impl/document_stores/firestore.py
+++ b/app/repositories/impl/document_stores/firestore.py
@@ -15,9 +15,9 @@ class Firestore(IDocumentStore):
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
- return True, document_ref.id
+ return document_ref.id
else:
- return False, None
+ return None
async def save_to_db_with_id(self, collection: str, item, id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
@@ -26,9 +26,9 @@ class Firestore(IDocumentStore):
doc_snapshot = await document_ref.get()
if doc_snapshot.exists:
self._logger.info(f"Document added with ID: {document_ref.id}")
- return True, document_ref.id
+ return document_ref.id
else:
- return False, None
+ return None
async def get_all(self, collection: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
@@ -36,3 +36,12 @@ class Firestore(IDocumentStore):
async for doc in collection_ref.stream():
docs.append(doc.to_dict())
return docs
+
+ async def get_doc_by_id(self, collection: str, doc_id: str):
+ collection_ref: AsyncCollectionReference = self._client.collection(collection)
+ doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
+ doc = await doc_ref.get()
+
+ if doc.exists:
+ return doc.to_dict()
+ return None
diff --git a/app/server.py b/app/server.py
index 71cc8fa..5ff7d5d 100644
--- a/app/server.py
+++ b/app/server.py
@@ -116,6 +116,16 @@ def setup_listeners(_app: FastAPI) -> None:
content={"error_code": exc.error_code, "message": exc.message},
)
+ @_app.exception_handler(Exception)
+ async def default_exception_handler(request: Request, exc: Exception):
+ """
+ Don't delete request param
+ """
+ return JSONResponse(
+ status_code=500,
+ content=str(exc),
+ )
+
def setup_middleware() -> List[Middleware]:
middleware = [
@@ -135,9 +145,10 @@ def setup_middleware() -> List[Middleware]:
def create_app() -> FastAPI:
+ env = os.getenv("ENV")
_app = FastAPI(
- docs_url=None,
- redoc_url=None,
+ docs_url="/docs" if env != "prod" else None,
+ redoc_url="/redoc" if env != "prod" else None,
middleware=setup_middleware(),
lifespan=lifespan
)
diff --git a/app/services/abc/__init__.py b/app/services/abc/__init__.py
index 82b6cba..b8130df 100644
--- a/app/services/abc/__init__.py
+++ b/app/services/abc/__init__.py
@@ -5,6 +5,7 @@ from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
from .training import ITrainingService
+from .kb import IKnowledgeBase
from .third_parties import *
__all__ = [
diff --git a/app/services/abc/grade.py b/app/services/abc/grade.py
index 41ce1c3..9ee89eb 100644
--- a/app/services/abc/grade.py
+++ b/app/services/abc/grade.py
@@ -4,20 +4,10 @@ from typing import Dict, List
class IGradeService(ABC):
+ @abstractmethod
+ async def grade_short_answers(self, data: Dict):
+ pass
+
@abstractmethod
async def calculate_grading_summary(self, extracted_sections: List):
pass
-
- @abstractmethod
- async def _calculate_section_grade_summary(self, section):
- pass
-
- @staticmethod
- @abstractmethod
- def _parse_openai_response(response):
- pass
-
- @staticmethod
- @abstractmethod
- def _parse_bullet_points(bullet_points_str, grade):
- pass
diff --git a/app/services/abc/kb.py b/app/services/abc/kb.py
new file mode 100644
index 0000000..4568c0c
--- /dev/null
+++ b/app/services/abc/kb.py
@@ -0,0 +1,10 @@
+from abc import ABC, abstractmethod
+
+from typing import List, Dict
+
+
+class IKnowledgeBase(ABC):
+
+ @abstractmethod
+ def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
+ pass
diff --git a/app/services/abc/level.py b/app/services/abc/level.py
index 127235f..7f7d954 100644
--- a/app/services/abc/level.py
+++ b/app/services/abc/level.py
@@ -1,10 +1,19 @@
from abc import ABC, abstractmethod
+import random
+
+from typing import Dict
+
+from fastapi import UploadFile
+
+from app.configs.constants import EducationalContent
class ILevelService(ABC):
@abstractmethod
- async def get_level_exam(self):
+ async def get_level_exam(
+ self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
+ ) -> Dict:
pass
@abstractmethod
@@ -12,13 +21,27 @@ class ILevelService(ABC):
pass
@abstractmethod
- async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
+ async def get_custom_level(self, data: Dict):
pass
@abstractmethod
- async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
+ async def upload_level(self, upload: UploadFile) -> Dict:
pass
@abstractmethod
- async def _generate_single_mc_level_question(self):
+ async def gen_multiple_choice(
+ self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
+ ):
+ pass
+
+ @abstractmethod
+ async def gen_blank_space_text_utas(
+ self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+ ):
+ pass
+
+ @abstractmethod
+ async def gen_reading_passage_utas(
+ self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+ ):
pass
diff --git a/app/services/abc/listening.py b/app/services/abc/listening.py
index 3547122..4654fde 100644
--- a/app/services/abc/listening.py
+++ b/app/services/abc/listening.py
@@ -1,68 +1,18 @@
+import queue
from abc import ABC, abstractmethod
from queue import Queue
-from typing import Dict
+from typing import Dict, List
class IListeningService(ABC):
@abstractmethod
- async def generate_listening_question(self, section: int, topic: str) -> Dict:
- pass
-
- @abstractmethod
- async def generate_listening_exercises(
- self, section: int, dialog: str,
- req_exercises: list[str], exercises_queue: Queue,
- start_id: int, difficulty: str
+ async def get_listening_question(
+ self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
+ number_of_exercises_q=queue.Queue(), start_id=-1
):
pass
@abstractmethod
- async def save_listening(self, parts, min_timer, difficulty):
+ async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict:
pass
-
- # ==================================================================================================================
- # Helpers
- # ==================================================================================================================
-
- @abstractmethod
- async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
- pass
-
- @abstractmethod
- async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
- pass
-
- @abstractmethod
- def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
- pass
-
- @staticmethod
- @abstractmethod
- def _get_random_voice(gender: str):
- pass
-
- @abstractmethod
- async def _gen_multiple_choice_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
- ):
- pass
-
- @abstractmethod
- async def _gen_write_blanks_questions_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
- ):
- pass
-
- @abstractmethod
- async def _gen_write_blanks_notes_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
- ):
- pass
-
- @abstractmethod
- async def _gen_write_blanks_form_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
- ):
- pass
-
diff --git a/app/services/abc/reading.py b/app/services/abc/reading.py
index 2621b62..70c672a 100644
--- a/app/services/abc/reading.py
+++ b/app/services/abc/reading.py
@@ -2,8 +2,6 @@ from abc import ABC, abstractmethod
from queue import Queue
from typing import List
-from app.configs.constants import QuestionType
-
class IReadingService(ABC):
@@ -14,36 +12,11 @@ class IReadingService(ABC):
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
- difficulty: str
- ):
- pass
-
- # ==================================================================================================================
- # Helpers
- # ==================================================================================================================
-
- @abstractmethod
- async def generate_reading_passage(self, q_type: QuestionType, topic: str):
- pass
-
- @abstractmethod
- async def _generate_reading_exercises(
- self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
+ difficulty: str,
+ start_id: int
):
pass
@abstractmethod
- async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
- pass
-
- @abstractmethod
- async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
- pass
-
- @abstractmethod
- async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
- pass
-
- @abstractmethod
- async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
+ async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
pass
diff --git a/app/services/abc/speaking.py b/app/services/abc/speaking.py
index 5e19e05..48d0fa8 100644
--- a/app/services/abc/speaking.py
+++ b/app/services/abc/speaking.py
@@ -1,21 +1,17 @@
from abc import ABC, abstractmethod
-from typing import List, Dict
+from typing import List, Dict, Optional
class ISpeakingService(ABC):
@abstractmethod
- async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
+ async def get_speaking_part(
+ self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
+ ) -> Dict:
pass
@abstractmethod
- async def grade_speaking_task_1_and_2(
- self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
- ):
- pass
-
- @abstractmethod
- async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
+ async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
@@ -23,35 +19,11 @@ class ISpeakingService(ABC):
pass
@abstractmethod
- async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
- pass
-
- @abstractmethod
- async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
- pass
-
- # ==================================================================================================================
- # Helpers
- # ==================================================================================================================
-
- @staticmethod
- @abstractmethod
- def _zero_rating(comment: str):
- pass
-
- @staticmethod
- @abstractmethod
- def _calculate_overall(response: Dict):
- pass
-
- @abstractmethod
- async def _get_speaking_corrections(self, text):
- pass
-
- @abstractmethod
- async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
- pass
-
- @abstractmethod
- async def _create_video(self, question: str, avatar: str, error_message: str):
+ async def generate_video(
+ self, part: int, avatar: str, topic: str, questions: list[str],
+ *,
+ second_topic: Optional[str] = None,
+ prompts: Optional[list[str]] = None,
+ suffix: Optional[str] = None,
+ ):
pass
diff --git a/app/services/abc/third_parties/llm.py b/app/services/abc/third_parties/llm.py
index 03d5550..38ba83c 100644
--- a/app/services/abc/third_parties/llm.py
+++ b/app/services/abc/third_parties/llm.py
@@ -1,6 +1,10 @@
from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import List, Optional, TypeVar, Callable
+from openai.types.chat import ChatCompletionMessageParam
+from pydantic import BaseModel
+
+T = TypeVar('T', bound=BaseModel)
class ILLMService(ABC):
@@ -19,3 +23,16 @@ class ILLMService(ABC):
@abstractmethod
async def prediction_override(self, **kwargs):
pass
+
+ @abstractmethod
+ async def pydantic_prediction(
+ self,
+ messages: List[ChatCompletionMessageParam],
+ map_to_model: Callable,
+ json_scheme: str,
+ *,
+ model: Optional[str] = None,
+ temperature: Optional[float] = None,
+ max_retries: int = 3
+ ) -> List[T] | T | None:
+ pass
diff --git a/app/services/abc/training.py b/app/services/abc/training.py
index f4719f0..bb62f01 100644
--- a/app/services/abc/training.py
+++ b/app/services/abc/training.py
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
+from typing import Dict
+
class ITrainingService(ABC):
@@ -7,7 +9,6 @@ class ITrainingService(ABC):
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
pass
- @staticmethod
@abstractmethod
- def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
+ async def get_training_content(self, training_content: Dict) -> Dict:
pass
diff --git a/app/services/abc/writing.py b/app/services/abc/writing.py
index c2dcce1..a59d442 100644
--- a/app/services/abc/writing.py
+++ b/app/services/abc/writing.py
@@ -1,6 +1,4 @@
from abc import ABC, abstractmethod
-from typing import Dict
-
class IWritingService(ABC):
@@ -11,22 +9,3 @@ class IWritingService(ABC):
@abstractmethod
async def grade_writing_task(self, task: int, question: str, answer: str):
pass
-
- # ==================================================================================================================
- # Helpers
- # ==================================================================================================================
-
- @staticmethod
- @abstractmethod
- def _get_writing_prompt(task: int, topic: str, difficulty: str):
- pass
-
- @staticmethod
- @abstractmethod
- async def _get_fixed_text(self, text):
- pass
-
- @staticmethod
- @abstractmethod
- def _zero_rating(comment: str):
- pass
diff --git a/app/services/impl/__init__.py b/app/services/impl/__init__.py
index bf36cee..f0c65cb 100644
--- a/app/services/impl/__init__.py
+++ b/app/services/impl/__init__.py
@@ -4,7 +4,7 @@ from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
-from .training import TrainingService
+from .training import *
from .third_parties import *
__all__ = [
@@ -14,6 +14,6 @@ __all__ = [
"SpeakingService",
"WritingService",
"GradeService",
- "TrainingService"
]
__all__.extend(third_parties.__all__)
+__all__.extend(training.__all__)
diff --git a/app/services/impl/grade.py b/app/services/impl/grade.py
index 723987b..f3792a2 100644
--- a/app/services/impl/grade.py
+++ b/app/services/impl/grade.py
@@ -1,42 +1,47 @@
import json
-from typing import List
-import copy
+from typing import List, Dict
+from app.configs.constants import GPTModels, TemperatureSettings
from app.services.abc import ILLMService, IGradeService
class GradeService(IGradeService):
- chat_config = {'max_tokens': 1000, 'temperature': 0.2}
- tools = [{
- "type": "function",
- "function": {
- "name": "save_evaluation_and_suggestions",
- "description": "Saves the evaluation and suggestions requested by input.",
- "parameters": {
- "type": "object",
- "properties": {
- "evaluation": {
- "type": "string",
- "description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.",
- },
- "suggestions": {
- "type": "string",
- "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.",
- },
- "bullet_points": {
- "type": "string",
- "description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ",
- },
- },
- "required": ["evaluation", "suggestions"],
- },
- }
- }]
-
def __init__(self, llm: ILLMService):
self._llm = llm
+ async def grade_short_answers(self, data: Dict):
+ json_format = {
+ "exercises": [
+ {
+ "id": 1,
+ "correct": True,
+ "correct_answer": " correct answer if wrong"
+ }
+ ]
+ }
+
+ messages = [
+ {
+ "role": "system",
+ "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Grade these answers according to the text content and write a correct answer if they are '
+ f'wrong. Text, questions and answers:\n {data}'
+ )
+ }
+ ]
+
+ return await self._llm.prediction(
+ GPTModels.GPT_4_O,
+ messages,
+ ["exercises"],
+ TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
async def calculate_grading_summary(self, extracted_sections: List):
ret = []
@@ -116,8 +121,8 @@ class GradeService(IGradeService):
)
}]
- chat_config = copy.deepcopy(self.chat_config)
- tools = copy.deepcopy(self.tools)
+ chat_config = {'max_tokens': 1000, 'temperature': 0.2}
+ tools = self.get_tools()
res = await self._llm.prediction_override(
model="gpt-3.5-turbo",
@@ -154,3 +159,42 @@ class GradeService(IGradeService):
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
else:
return []
+
+ @staticmethod
+ def get_tools():
+ return [
+ {
+ "type": "function",
+ "function": {
+ "name": "save_evaluation_and_suggestions",
+ "description": "Saves the evaluation and suggestions requested by input.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "evaluation": {
+ "type": "string",
+ "description": (
+ "A comment on the IELTS section grade obtained in the specific section and what "
+ "it could mean without suggestions."
+ ),
+ },
+ "suggestions": {
+ "type": "string",
+ "description": (
+ "A small paragraph text with suggestions on how to possibly get a better grade "
+ "than the one obtained."
+ ),
+ },
+ "bullet_points": {
+ "type": "string",
+ "description": (
+ "Text with four bullet points to improve the english speaking ability. Only "
+ "include text for the bullet points separated by a paragraph."
+ ),
+ },
+ },
+ "required": ["evaluation", "suggestions"],
+ },
+ }
+ }
+ ]
diff --git a/app/services/impl/level.py b/app/services/impl/level.py
deleted file mode 100644
index 40f651a..0000000
--- a/app/services/impl/level.py
+++ /dev/null
@@ -1,506 +0,0 @@
-import json
-import random
-import uuid
-
-from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
-from app.helpers import ExercisesHelper
-from app.repositories.abc import IDocumentStore
-from app.services.abc import ILevelService, ILLMService, IReadingService
-
-
-class LevelService(ILevelService):
-
- def __init__(
- self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
- ):
- self._llm = llm
- self._document_store = document_store
- self._reading_service = reading_service
-
- async def get_level_exam(self):
- number_of_exercises = 25
- exercises = await self._gen_multiple_choice_level(number_of_exercises)
- return {
- "exercises": [exercises],
- "isDiagnostic": False,
- "minTimer": 25,
- "module": "level"
- }
-
- async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
- gen_multiple_choice_for_text = (
- f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
- 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
- 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
- 'punctuation. Make sure every question only has 1 correct answer.'
- )
-
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"questions": [{"id": "9", "options": '
- '[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
- '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
- '"prompt": "Which of the following is a conjunction?", '
- '"solution": "A", "variant": "text"}]}'
- )
- },
- {
- "role": "user",
- "content": gen_multiple_choice_for_text
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- if len(question["questions"]) != quantity:
- return await self._gen_multiple_choice_level(quantity, start_id)
- else:
- all_exams = await self._document_store.get_all("level")
- seen_keys = set()
- for i in range(len(question["questions"])):
- question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
- all_exams, question["questions"][i], question, seen_keys
- )
- return {
- "id": str(uuid.uuid4()),
- "prompt": "Select the appropriate option.",
- "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
- "type": "multipleChoice",
- }
-
- async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
- # Extracting relevant fields for comparison
- key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
- # Check if the key is in the set
- if key in seen_keys:
- return await self._replace_exercise_if_exists(
- all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
- )
- else:
- seen_keys.add(key)
-
- for exam in all_exams:
- exam_dict = exam.to_dict()
- if any(
- exercise["prompt"] == current_exercise["prompt"] and
- any(exercise["options"][0]["text"] == current_option["text"] for current_option in
- current_exercise["options"])
- for exercise in exam_dict.get("exercises", [])[0]["questions"]
- ):
- return await self._replace_exercise_if_exists(
- all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
- )
- return current_exercise, seen_keys
-
- async def _generate_single_mc_level_question(self):
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
- '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
- '"prompt": "Which of the following is a conjunction?", '
- '"solution": "A", "variant": "text"}'
- )
- },
- {
- "role": "user",
- "content": (
- 'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
- 'intermediate or advanced.'
- )
-
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- return question
-
- async def get_level_utas(self):
- # Formats
- mc = {
- "id": str(uuid.uuid4()),
- "prompt": "Choose the correct word or group of words that completes the sentences.",
- "questions": None,
- "type": "multipleChoice",
- "part": 1
- }
-
- umc = {
- "id": str(uuid.uuid4()),
- "prompt": "Choose the underlined word or group of words that is not correct.",
- "questions": None,
- "type": "multipleChoice",
- "part": 2
- }
-
- bs_1 = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and write the correct word for each space.",
- "questions": None,
- "type": "blankSpaceText",
- "part": 3
- }
-
- bs_2 = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and write the correct word for each space.",
- "questions": None,
- "type": "blankSpaceText",
- "part": 4
- }
-
- reading = {
- "id": str(uuid.uuid4()),
- "prompt": "Read the text and answer the questions below.",
- "questions": None,
- "type": "readingExercises",
- "part": 5
- }
-
- all_mc_questions = []
-
- # PART 1
- mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
- print(json.dumps(mc_exercises1, indent=4))
- all_mc_questions.append(mc_exercises1)
-
- # PART 2
- mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
- print(json.dumps(mc_exercises2, indent=4))
- all_mc_questions.append(mc_exercises2)
-
- # PART 3
- mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
- print(json.dumps(mc_exercises3, indent=4))
- all_mc_questions.append(mc_exercises3)
-
- mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
- print(json.dumps(mc_exercises, indent=4))
- mc["questions"] = mc_exercises
-
- # Underlined mc
- underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
- print(json.dumps(underlined_mc, indent=4))
- umc["questions"] = underlined_mc
-
- # Blank Space text 1
- blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
- print(json.dumps(blank_space_text_1, indent=4))
- bs_1["questions"] = blank_space_text_1
-
- # Blank Space text 2
- blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
- print(json.dumps(blank_space_text_2, indent=4))
- bs_2["questions"] = blank_space_text_2
-
- # Reading text
- reading_text = await self._gen_reading_passage_utas(87, 10, 4)
- print(json.dumps(reading_text, indent=4))
- reading["questions"] = reading_text
-
- return {
- "exercises": {
- "blankSpaceMultipleChoice": mc,
- "underlinedMultipleChoice": umc,
- "blankSpaceText1": bs_1,
- "blankSpaceText2": bs_2,
- "readingExercises": reading,
- },
- "isDiagnostic": False,
- "minTimer": 25,
- "module": "level"
- }
-
- async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
- gen_multiple_choice_for_text = (
- f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
- 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
- 'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
- 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
- )
-
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"questions": [{"id": "9", "options": [{"id": "A", "text": '
- '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
- '"Happy"}, {"id": "D", "text": "Jump"}], '
- '"prompt": "Which of the following is a conjunction?", '
- '"solution": "A", "variant": "text"}]}')
- },
- {
- "role": "user",
- "content": gen_multiple_choice_for_text
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- if len(question["questions"]) != quantity:
- return await self._gen_multiple_choice_level(quantity, start_id)
- else:
- seen_keys = set()
- for i in range(len(question["questions"])):
- question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
- all_exams,
- question["questions"][i],
- question,
- seen_keys
- )
- return ExercisesHelper.fix_exercise_ids(question, start_id)
-
- async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
- # Extracting relevant fields for comparison
- key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
- # Check if the key is in the set
- if key in seen_keys:
- return self._replace_exercise_if_exists_utas(
- all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
- )
- else:
- seen_keys.add(key)
-
- for exam in all_exams:
- if any(
- exercise["prompt"] == current_exercise["prompt"] and
- any(exercise["options"][0]["text"] == current_option["text"] for current_option in
- current_exercise["options"])
- for exercise in exam.get("questions", [])
- ):
- return self._replace_exercise_if_exists_utas(
- all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
- )
- return current_exercise, seen_keys
-
-
- async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
- json_format = {
- "questions": [
- {
- "id": "9",
- "options": [
- {
- "id": "A",
- "text": "a"
- },
- {
- "id": "B",
- "text": "b"
- },
- {
- "id": "C",
- "text": "c"
- },
- {
- "id": "D",
- "text": "d"
- }
- ],
- "prompt": "prompt",
- "solution": "A",
- "variant": "text"
- }
- ]
- }
-
- gen_multiple_choice_for_text = (
- f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
- 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
- 'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
- 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
- )
-
- messages = [
- {
- "role": "system",
- "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
- },
- {
- "role": "user",
- "content": gen_multiple_choice_for_text
- },
- {
- "role": "user",
- "content": (
- 'The type of multiple choice is the prompt has wrong words or group of words and the options '
- 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
- 'Prompt: "I complain about my boss all the time, but my colleagues thinks '
- 'the boss is nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
- )
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- if len(question["questions"]) != quantity:
- return await self._gen_multiple_choice_level(quantity, start_id)
- else:
- return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
-
- async def _gen_blank_space_text_utas(
- self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
- ):
- json_format = {
- "question": {
- "words": [
- {
- "id": "1",
- "text": "a"
- },
- {
- "id": "2",
- "text": "b"
- },
- {
- "id": "3",
- "text": "c"
- },
- {
- "id": "4",
- "text": "d"
- }
- ],
- "text": "text"
- }
- }
-
- messages = [
- {
- "role": "system",
- "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
- },
- {
- "role": "user",
- "content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
- },
- {
- "role": "user",
- "content": (
- f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
- 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
- 'The ids must be ordered throughout the text and the words must be replaced only once. Put '
- 'the removed words and respective ids on the words array of the json in the correct order.'
- )
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- return question["question"]
-
- async def _gen_reading_passage_utas(
- self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
- ):
-
- passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
- short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
- mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
- return {
- "exercises": {
- "shortAnswer": short_answer,
- "multipleChoice": mc_exercises,
- },
- "text": {
- "content": passage["text"],
- "title": passage["title"]
- }
- }
-
- async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
- json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
-
- messages = [
- {
- "role": "system",
- "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
- },
- {
- "role": "user",
- "content": (
- 'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
- 'maximum 3 words per answer, about this text:\n"' + text + '"')
- },
- {
- "role": "user",
- "content": 'The id starts at ' + str(start_id) + '.'
- }
- ]
-
- return (
- await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
- )["questions"]
-
- async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
- json_format = {
- "questions": [
- {
- "id": "9",
- "options": [
- {
- "id": "A",
- "text": "a"
- },
- {
- "id": "B",
- "text": "b"
- },
- {
- "id": "C",
- "text": "c"
- },
- {
- "id": "D",
- "text": "d"
- }
- ],
- "prompt": "prompt",
- "solution": "A",
- "variant": "text"
- }
- ]
- }
-
- messages = [
- {
- "role": "system",
- "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
- },
- {
- "role": "user",
- "content": 'Generate ' + str(
- mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
- },
- {
- "role": "user",
- "content": 'Make sure every question only has 1 correct answer.'
- }
- ]
-
- question = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
-
- if len(question["questions"]) != mc_quantity:
- return await self._gen_multiple_choice_level(mc_quantity, start_id)
- else:
- return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
diff --git a/app/services/impl/level/__init__.py b/app/services/impl/level/__init__.py
new file mode 100644
index 0000000..584a03d
--- /dev/null
+++ b/app/services/impl/level/__init__.py
@@ -0,0 +1,5 @@
+from .level import LevelService
+
+__all__ = [
+ "LevelService"
+]
\ No newline at end of file
diff --git a/app/services/impl/level/custom.py b/app/services/impl/level/custom.py
new file mode 100644
index 0000000..09ba991
--- /dev/null
+++ b/app/services/impl/level/custom.py
@@ -0,0 +1,335 @@
+import queue
+import random
+
+from typing import Dict
+
+from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
+from app.services.abc import (
+ ILLMService, ILevelService, IReadingService,
+ IWritingService, IListeningService, ISpeakingService
+)
+
+
+class CustomLevelModule:
+
+ def __init__(
+ self,
+ llm: ILLMService,
+ level: ILevelService,
+ reading: IReadingService,
+ listening: IListeningService,
+ writing: IWritingService,
+ speaking: ISpeakingService
+ ):
+ self._llm = llm
+ self._level = level
+ self._reading = reading
+ self._listening = listening
+ self._writing = writing
+ self._speaking = speaking
+
+ # TODO: I've changed this to retrieve the args from the body request and not request query args
+ async def get_custom_level(self, data: Dict):
+ nr_exercises = int(data.get('nr_exercises'))
+
+ exercise_id = 1
+ response = {
+ "exercises": {},
+ "module": "level"
+ }
+ for i in range(1, nr_exercises + 1, 1):
+ exercise_type = data.get(f'exercise_{i}_type')
+ exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
+ exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
+ exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
+ exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
+ exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
+ exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
+ exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
+ exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
+ exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
+ exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
+ exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
+ exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
+ exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
+ exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
+ exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
+ exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
+
+ if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
+ response["exercises"][f"exercise_{i}"] = {}
+ response["exercises"][f"exercise_{i}"]["questions"] = []
+ response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ mc_response = await self._level.gen_multiple_choice(
+ "normal", qty, exercise_id, utas=True,
+ all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+ )
+ response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
+ response["exercises"][f"exercise_{i}"] = {}
+ response["exercises"][f"exercise_{i}"]["questions"] = []
+ response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ mc_response = await self._level.gen_multiple_choice(
+ "blank_space", qty, exercise_id, utas=True,
+ all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+ )
+ response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
+ response["exercises"][f"exercise_{i}"] = {}
+ response["exercises"][f"exercise_{i}"]["questions"] = []
+ response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+ while exercise_qty > 0:
+ if exercise_qty - 15 > 0:
+ qty = 15
+ else:
+ qty = exercise_qty
+
+ mc_response = await self._level.gen_multiple_choice(
+ "underline", qty, exercise_id, utas=True,
+ all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+ )
+ response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+
+ exercise_id = exercise_id + qty
+ exercise_qty = exercise_qty - qty
+
+ elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
+ response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
+ exercise_qty, exercise_id, exercise_text_size
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
+ exercise_id = exercise_id + exercise_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
+ response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
+ exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
+ exercise_id = exercise_id + exercise_qty
+ elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
+ response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
+ 1, exercise_topic, exercise_difficulty
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "writing"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
+ response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
+ 2, exercise_topic, exercise_difficulty
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "writing"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
+ response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+ 1, exercise_topic, exercise_difficulty, exercise_topic_2
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
+ response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+ 2, exercise_topic, exercise_difficulty
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "speaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
+ response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+ 3, exercise_topic, exercise_difficulty
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
+ exercise_id = exercise_id + 1
+ elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+ 1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+ 2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_fillblanks_qty != -1:
+ exercises.append('fillBlanks')
+ exercise_qty_q.put(exercise_fillblanks_qty)
+ total_qty = total_qty + exercise_fillblanks_qty
+ if exercise_writeblanks_qty != -1:
+ exercises.append('writeBlanks')
+ exercise_qty_q.put(exercise_writeblanks_qty)
+ total_qty = total_qty + exercise_writeblanks_qty
+ if exercise_truefalse_qty != -1:
+ exercises.append('trueFalse')
+ exercise_qty_q.put(exercise_truefalse_qty)
+ total_qty = total_qty + exercise_truefalse_qty
+ if exercise_paragraphmatch_qty != -1:
+ exercises.append('paragraphMatch')
+ exercise_qty_q.put(exercise_paragraphmatch_qty)
+ total_qty = total_qty + exercise_paragraphmatch_qty
+ if exercise_ideamatch_qty != -1:
+ exercises.append('ideaMatch')
+ exercise_qty_q.put(exercise_ideamatch_qty)
+ total_qty = total_qty + exercise_ideamatch_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+ 3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+ if exercise_writeblanksfill_qty != -1:
+ exercises.append('writeBlanksFill')
+ exercise_qty_q.put(exercise_writeblanksfill_qty)
+ total_qty = total_qty + exercise_writeblanksfill_qty
+ if exercise_writeblanksform_qty != -1:
+ exercises.append('writeBlanksForm')
+ exercise_qty_q.put(exercise_writeblanksform_qty)
+ total_qty = total_qty + exercise_writeblanksform_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+ 1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+ 2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc3_qty != -1:
+ exercises.append('multipleChoice3Options')
+ exercise_qty_q.put(exercise_mc3_qty)
+ total_qty = total_qty + exercise_mc3_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+ 3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+ elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
+ exercises = []
+ exercise_qty_q = queue.Queue()
+ total_qty = 0
+ if exercise_mc_qty != -1:
+ exercises.append('multipleChoice')
+ exercise_qty_q.put(exercise_mc_qty)
+ total_qty = total_qty + exercise_mc_qty
+ if exercise_writeblanksquestions_qty != -1:
+ exercises.append('writeBlanksQuestions')
+ exercise_qty_q.put(exercise_writeblanksquestions_qty)
+ total_qty = total_qty + exercise_writeblanksquestions_qty
+ if exercise_writeblanksfill_qty != -1:
+ exercises.append('writeBlanksFill')
+ exercise_qty_q.put(exercise_writeblanksfill_qty)
+ total_qty = total_qty + exercise_writeblanksfill_qty
+ if exercise_writeblanksform_qty != -1:
+ exercises.append('writeBlanksForm')
+ exercise_qty_q.put(exercise_writeblanksform_qty)
+ total_qty = total_qty + exercise_writeblanksform_qty
+
+ response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+ 4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+ )
+ response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+ exercise_id = exercise_id + total_qty
+
+ return response
diff --git a/app/services/impl/level/level.py b/app/services/impl/level/level.py
new file mode 100644
index 0000000..285a11e
--- /dev/null
+++ b/app/services/impl/level/level.py
@@ -0,0 +1,417 @@
+import json
+import random
+import uuid
+
+from typing import Dict
+
+from fastapi import UploadFile
+
+from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
+from app.helpers import ExercisesHelper
+from app.repositories.abc import IDocumentStore
+from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
+ IListeningService
+from .custom import CustomLevelModule
+from .upload import UploadLevelModule
+
+
+class LevelService(ILevelService):
+
+ def __init__(
+ self,
+ llm: ILLMService,
+ document_store: IDocumentStore,
+ mc_variants: Dict,
+ reading_service: IReadingService,
+ writing_service: IWritingService,
+ speaking_service: ISpeakingService,
+ listening_service: IListeningService
+ ):
+ self._llm = llm
+ self._document_store = document_store
+ self._reading_service = reading_service
+ self._custom_module = CustomLevelModule(
+ llm, self, reading_service, listening_service, writing_service, speaking_service
+ )
+ self._upload_module = UploadLevelModule(llm)
+
+ # TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
+ # mc_variants are stored in ./mc_variants.json
+ self._mc_variants = mc_variants
+
+ async def upload_level(self, upload: UploadFile) -> Dict:
+ return await self._upload_module.generate_level_from_file(upload)
+
+ async def get_custom_level(self, data: Dict):
+ return await self._custom_module.get_custom_level(data)
+
+ async def get_level_exam(
+ self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
+ ) -> Dict:
+ exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
+ return {
+ "exercises": [exercises],
+ "isDiagnostic": diagnostic,
+ "minTimer": min_timer,
+ "module": "level"
+ }
+
+ async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
+ # Formats
+ mc = {
+ "id": str(uuid.uuid4()),
+ "prompt": "Choose the correct word or group of words that completes the sentences.",
+ "questions": None,
+ "type": "multipleChoice",
+ "part": 1
+ }
+
+ umc = {
+ "id": str(uuid.uuid4()),
+ "prompt": "Choose the underlined word or group of words that is not correct.",
+ "questions": None,
+ "type": "multipleChoice",
+ "part": 2
+ }
+
+ bs_1 = {
+ "id": str(uuid.uuid4()),
+ "prompt": "Read the text and write the correct word for each space.",
+ "questions": None,
+ "type": "blankSpaceText",
+ "part": 3
+ }
+
+ bs_2 = {
+ "id": str(uuid.uuid4()),
+ "prompt": "Read the text and write the correct word for each space.",
+ "questions": None,
+ "type": "blankSpaceText",
+ "part": 4
+ }
+
+ reading = {
+ "id": str(uuid.uuid4()),
+ "prompt": "Read the text and answer the questions below.",
+ "questions": None,
+ "type": "readingExercises",
+ "part": 5
+ }
+
+ all_mc_questions = []
+
+ # PART 1
+ # await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
+ mc_exercises1 = await self.gen_multiple_choice(
+ "blank_space", 15, 1, utas=True, all_exams=all_mc_questions
+ )
+ print(json.dumps(mc_exercises1, indent=4))
+ all_mc_questions.append(mc_exercises1)
+
+ # PART 2
+ mc_exercises2 = await self.gen_multiple_choice(
+ "blank_space", 15, 16, utas=True, all_exams=all_mc_questions
+ )
+ print(json.dumps(mc_exercises2, indent=4))
+ all_mc_questions.append(mc_exercises2)
+
+ # PART 3
+ mc_exercises3 = await self.gen_multiple_choice(
+ "blank_space", 15, 31, utas=True, all_exams=all_mc_questions
+ )
+ print(json.dumps(mc_exercises3, indent=4))
+ all_mc_questions.append(mc_exercises3)
+
+ mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
+ print(json.dumps(mc_exercises, indent=4))
+ mc["questions"] = mc_exercises
+
+ # Underlined mc
+ underlined_mc = await self.gen_multiple_choice(
+ "underline", 15, 46, utas=True, all_exams=all_mc_questions
+ )
+ print(json.dumps(underlined_mc, indent=4))
+ umc["questions"] = underlined_mc
+
+ # Blank Space text 1
+ blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
+ print(json.dumps(blank_space_text_1, indent=4))
+ bs_1["questions"] = blank_space_text_1
+
+ # Blank Space text 2
+ blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
+ print(json.dumps(blank_space_text_2, indent=4))
+ bs_2["questions"] = blank_space_text_2
+
+ # Reading text
+ reading_text = await self.gen_reading_passage_utas(87, 10, 4)
+ print(json.dumps(reading_text, indent=4))
+ reading["questions"] = reading_text
+
+ return {
+ "exercises": {
+ "blankSpaceMultipleChoice": mc,
+ "underlinedMultipleChoice": umc,
+ "blankSpaceText1": bs_1,
+ "blankSpaceText2": bs_2,
+ "readingExercises": reading,
+ },
+ "isDiagnostic": diagnostic,
+ "minTimer": min_timer,
+ "module": "level"
+ }
+
+ async def gen_multiple_choice(
+ self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
+ ):
+ mc_template = self._mc_variants[mc_variant]
+ blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+ gen_multiple_choice_for_text: str = (
+ 'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
+ 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
+ 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
+ 'punctuation. Make sure every question only has 1 correct answer.'
+ )
+
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+ )
+ },
+ {
+ "role": "user",
+ "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
+ }
+ ]
+
+ if mc_variant == "underline":
+ messages.append({
+ "role": "user",
+ "content": (
+ 'The type of multiple choice in the prompt has wrong words or group of words and the options '
+ 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+ 'Prompt: "I complain about my boss all the time, but my colleagues thinks '
+ 'the boss is nice."\n'
+ 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+ )
+ })
+
+ question = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ if len(question["questions"]) != quantity:
+ return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
+ else:
+ if not utas:
+ all_exams = await self._document_store.get_all("level")
+ seen_keys = set()
+ for i in range(len(question["questions"])):
+ question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+ all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+ )
+ return {
+ "id": str(uuid.uuid4()),
+ "prompt": "Select the appropriate option.",
+ "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
+ "type": "multipleChoice",
+ }
+ else:
+ if all_exams is not None:
+ seen_keys = set()
+ for i in range(len(question["questions"])):
+ question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+ all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+ )
+ response = ExercisesHelper.fix_exercise_ids(question, start_id)
+ response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+ return response
+
+ async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
+ mc_template = self._mc_variants[mc_variant]["questions"][0]
+ blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
+ f'it can be easy, intermediate or advanced.'
+ )
+
+ }
+ ]
+
+ if mc_variant == "underline":
+ messages.append({
+ "role": "user",
+ "content": (
+ 'The type of multiple choice in the prompt has wrong words or group of words and the options '
+ 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+ 'Prompt: "I complain about my boss all the time, but my colleagues thinks '
+ 'the boss is nice."\n'
+ 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+ )
+ })
+
+ question = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ return question
+
+ async def _replace_exercise_if_exists(
+ self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
+ ):
+ # Extracting relevant fields for comparison
+ key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+ # Check if the key is in the set
+ if key in seen_keys:
+ return await self._replace_exercise_if_exists(
+ all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
+ mc_variant, utas
+ )
+ else:
+ seen_keys.add(key)
+
+ if not utas:
+ for exam in all_exams:
+ exam_dict = exam.to_dict()
+ if len(exam_dict.get("parts", [])) > 0:
+ exercise_dict = exam_dict.get("parts", [])[0]
+ if len(exercise_dict.get("exercises", [])) > 0:
+ if any(
+ exercise["prompt"] == current_exercise["prompt"] and
+ any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+ current_exercise["options"])
+ for exercise in exercise_dict.get("exercises", [])[0]["questions"]
+ ):
+ return await self._replace_exercise_if_exists(
+ all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+ seen_keys, mc_variant, utas
+ )
+ else:
+ for exam in all_exams:
+ if any(
+ exercise["prompt"] == current_exercise["prompt"] and
+ any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+ current_exercise["options"])
+ for exercise in exam.get("questions", [])
+ ):
+ return await self._replace_exercise_if_exists(
+ all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+ seen_keys, mc_variant, utas
+ )
+ return current_exercise, seen_keys
+
+ async def gen_blank_space_text_utas(
+ self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+ ):
+ json_template = self._mc_variants["blank_space_text"]
+ messages = [
+ {
+ "role": "system",
+ "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+ },
+ {
+ "role": "user",
+ "content": f'Generate a text of at least {size} words about the topic {topic}.'
+ },
+ {
+ "role": "user",
+ "content": (
+ f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
+ 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
+ 'The ids must be ordered throughout the text and the words must be replaced only once. '
+ 'Put the removed words and respective ids on the words array of the json in the correct order.'
+ )
+ }
+ ]
+
+ question = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ return question["question"]
+
+ async def gen_reading_passage_utas(
+ self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+ ):
+ passage = await self._reading_service.generate_reading_passage(1, topic)
+ short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
+ mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
+ return {
+ "exercises": {
+ "shortAnswer": short_answer,
+ "multipleChoice": mc_exercises,
+ },
+ "text": {
+ "content": passage["text"],
+ "title": passage["title"]
+ }
+ }
+
+ async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
+ json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
+
+ messages = [
+ {
+ "role": "system",
+ "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+ },
+ {
+ "role": "user",
+ "content": (
+ f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
+ f'maximum 3 words per answer, about this text:\n"{text}"'
+ )
+ },
+ {
+ "role": "user",
+ "content": f'The id starts at {start_id}.'
+ }
+ ]
+
+ question = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ return question["questions"]
+
+ async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
+ json_template = self._mc_variants["text_mc_utas"]
+
+ messages = [
+ {
+ "role": "system",
+ "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+ },
+ {
+ "role": "user",
+ "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
+ },
+ {
+ "role": "user",
+ "content": 'Make sure every question only has 1 correct answer.'
+ }
+ ]
+
+ question = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ if len(question["questions"]) != mc_quantity:
+ return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
+ else:
+ response = ExercisesHelper.fix_exercise_ids(question, start_id)
+ response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+ return response
diff --git a/app/services/impl/level/mc_variants.json b/app/services/impl/level/mc_variants.json
new file mode 100644
index 0000000..3b9c55b
--- /dev/null
+++ b/app/services/impl/level/mc_variants.json
@@ -0,0 +1,137 @@
+{
+ "normal": {
+ "questions": [
+ {
+ "id": "9",
+ "options": [
+ {
+ "id": "A",
+ "text": "And"
+ },
+ {
+ "id": "B",
+ "text": "Cat"
+ },
+ {
+ "id": "C",
+ "text": "Happy"
+ },
+ {
+ "id": "D",
+ "text": "Jump"
+ }
+ ],
+ "prompt": "Which of the following is a conjunction?",
+ "solution": "A",
+ "variant": "text"
+ }
+ ]
+ },
+ "blank_space": {
+ "questions": [
+ {
+ "id": "9",
+ "options": [
+ {
+ "id": "A",
+ "text": "And"
+ },
+ {
+ "id": "B",
+ "text": "Cat"
+ },
+ {
+ "id": "C",
+ "text": "Happy"
+ },
+ {
+ "id": "D",
+ "text": "Jump"
+ }
+ ],
+ "prompt": "Which of the following is a conjunction?",
+ "solution": "A",
+ "variant": "text"
+ }
+ ]
+ },
+ "underline": {
+ "questions": [
+ {
+ "id": "9",
+ "options": [
+ {
+ "id": "A",
+ "text": "a"
+ },
+ {
+ "id": "B",
+ "text": "b"
+ },
+ {
+ "id": "C",
+ "text": "c"
+ },
+ {
+ "id": "D",
+ "text": "d"
+ }
+ ],
+ "prompt": "prompt",
+ "solution": "A",
+ "variant": "text"
+ }
+ ]
+ },
+ "blank_space_text": {
+ "question": {
+ "words": [
+ {
+ "id": "1",
+ "text": "a"
+ },
+ {
+ "id": "2",
+ "text": "b"
+ },
+ {
+ "id": "3",
+ "text": "c"
+ },
+ {
+ "id": "4",
+ "text": "d"
+ }
+ ],
+ "text": "text"
+ }
+ },
+ "text_mc_utas": {
+ "questions": [
+ {
+ "id": "9",
+ "options": [
+ {
+ "id": "A",
+ "text": "a"
+ },
+ {
+ "id": "B",
+ "text": "b"
+ },
+ {
+ "id": "C",
+ "text": "c"
+ },
+ {
+ "id": "D",
+ "text": "d"
+ }
+ ],
+ "prompt": "prompt",
+ "solution": "A",
+ "variant": "text"
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/app/services/impl/level/upload.py b/app/services/impl/level/upload.py
new file mode 100644
index 0000000..ee2d326
--- /dev/null
+++ b/app/services/impl/level/upload.py
@@ -0,0 +1,404 @@
+import aiofiles
+import os
+import uuid
+from logging import getLogger
+
+from typing import Dict, Any, Tuple, Coroutine
+
+import pdfplumber
+from fastapi import UploadFile
+
+from app.services.abc import ILLMService
+from app.helpers import LoggerHelper, FileHelper
+from app.mappers import ExamMapper
+
+from app.dtos.exam import Exam
+from app.dtos.sheet import Sheet
+
+
+class UploadLevelModule:
+ def __init__(self, openai: ILLMService):
+ self._logger = getLogger(__name__)
+ self._llm = openai
+
+ # TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
+ # firestore, return the id right away, in generation view poll for the id
+ async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
+ ext, path_id = await self._save_upload(file)
+ FileHelper.convert_file_to_pdf(
+ f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
+ )
+ file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
+
+ if not file_has_images:
+ FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
+
+ completion: Coroutine[Any, Any, Exam] = (
+ self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
+ )
+ response = await completion
+
+ FileHelper.remove_directory(f'./tmp/{path_id}')
+
+ if response:
+ return self.fix_ids(response.dict(exclude_none=True))
+ return None
+
+ @staticmethod
+ @LoggerHelper.suppress_loggers()
+ def _check_pdf_for_images(pdf_path: str) -> bool:
+ with pdfplumber.open(pdf_path) as pdf:
+ for page in pdf.pages:
+ if page.images:
+ return True
+ return False
+
+ @staticmethod
+ async def _save_upload(file: UploadFile) -> Tuple[str, str]:
+ ext = file.filename.split('.')[-1]
+ path_id = str(uuid.uuid4())
+ os.makedirs(f'./tmp/{path_id}', exist_ok=True)
+
+ tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
+ file_bytes: bytes = await file.read()
+
+ async with aiofiles.open(tmp_filename, 'wb') as file:
+ await file.write(file_bytes)
+
+ return ext, path_id
+
+ def _level_json_schema(self):
+ return {
+ "parts": [
+ {
+ "context": "",
+ "exercises": [
+ self._multiple_choice_html(),
+ self._passage_blank_space_html()
+ ]
+ }
+ ]
+ }
+
+ async def _html_completion(self, path_id: str) -> Exam:
+ async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
+ html = await f.read()
+
+ return await self._llm.pydantic_prediction(
+ [self._gpt_instructions_html(),
+ {
+ "role": "user",
+ "content": html
+ }
+ ],
+ ExamMapper.map_to_exam_model,
+ str(self._level_json_schema())
+ )
+
+ def _gpt_instructions_html(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
+ 'Your current task is to scrape html english questions sheets.\n\n'
+
+ 'In the question sheet you will only see 4 types of question:\n'
+ '- blank space multiple choice\n'
+ '- underline multiple choice\n'
+ '- reading passage blank space multiple choice\n'
+ '- reading passage multiple choice\n\n'
+
+ 'For the first two types of questions the template is the same but the question prompts differ, '
+ 'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
+ 'multiple "_", in the underline you must include in the prompt the to '
+ 'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
+
+ 'For the reading passage exercise you must handle the formatting of the passages. If it is a '
+ 'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
+ 'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
+ 'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
+ 'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
+ 'between paragraphs.\n\n'
+
+ 'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
+ 'structure, don\'t format the reading passages paragraphs only by the tags, try to figure '
+ 'out the best paragraph separation possible.'
+
+ 'You will place all the information in a single JSON: '
+ '{"parts": [{"exercises": [{...}], "context": ""}]}\n '
+ 'Where {...} are the exercises templates for each part of a question sheet and the optional field '
+ 'context.'
+
+ 'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
+ 'so that you can group the exercises by the parts that are in the html, this is crucial since only '
+ 'reading passage multiple choice require context and if the context is included in parts where it '
+ 'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
+
+ 'The templates for the exercises are the following:\n'
+ '- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
+ f'{self._multiple_choice_html()}\n'
+ f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
+
+ 'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
+ 'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
+ 'exercises exclude the context field.'
+ )
+ }
+
+ @staticmethod
+ def _multiple_choice_html():
+ return {
+ "type": "multipleChoice",
+ "prompt": "Select the appropriate option.",
+ "questions": [
+ {
+ "id": "",
+ "prompt": "",
+ "solution": "",
+ "options": [
+ {
+ "id": "A",
+ "text": ""
+ },
+ {
+ "id": "B",
+ "text": ""
+ },
+ {
+ "id": "C",
+ "text": ""
+ },
+ {
+ "id": "D",
+ "text": ""
+ }
+ ]
+ }
+ ]
+ }
+
+ @staticmethod
+ def _passage_blank_space_html():
+ return {
+ "type": "fillBlanks",
+ "variant": "mc",
+ "prompt": "Click a blank to select the appropriate word for it.",
+ "text": (
+ "}} with 2 newlines between paragraphs>"
+ ),
+ "solutions": [
+ {
+ "id": "",
+ "solution": ""
+ }
+ ],
+ "words": [
+ {
+ "id": "",
+ "options": {
+ "A": "",
+ "B": "",
+ "C": "",
+ "D": ""
+ }
+ }
+ ]
+ }
+
+ async def _png_completion(self, path_id: str) -> Exam:
+ FileHelper.pdf_to_png(path_id)
+
+ tmp_files = os.listdir(f'./tmp/{path_id}')
+ pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
+ pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
+
+ json_schema = {
+ "components": [
+ {"type": "part", "part": ""},
+ self._multiple_choice_png(),
+ {"type": "blanksPassage", "text": (
+ "}} with 2 newlines between paragraphs>"
+ )},
+ {"type": "passage", "context": (
+ ""
+ )},
+ self._passage_blank_space_png()
+ ]
+ }
+
+ components = []
+
+ for i in range(len(pages)):
+ current_page = pages[i]
+ next_page = pages[i + 1] if i + 1 < len(pages) else None
+ batch = [current_page, next_page] if next_page else [current_page]
+
+ sheet = await self._png_batch(path_id, batch, json_schema)
+ sheet.batch = i + 1
+ components.append(sheet.dict())
+
+ batches = {"batches": components}
+
+ return await self._batches_to_exam_completion(batches)
+
+ async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
+ return await self._llm.pydantic_prediction(
+ [self._gpt_instructions_png(),
+ {
+ "role": "user",
+ "content": [
+ *FileHelper.b64_pngs(path_id, files)
+ ]
+ }
+ ],
+ ExamMapper.map_to_sheet,
+ str(json_schema)
+ )
+
+ def _gpt_instructions_png(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
+ 'Your current task is to scan english questions sheets.\n\n'
+
+ 'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
+ 'sheet components you will retrieve from the images, the components and their corresponding JSON '
+ 'templates are as follows:\n'
+
+ '- Part, a standalone part or part of a section of the question sheet: '
+ '{"type": "part", "part": ""}\n'
+
+ '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
+ 'the prompt field of the template: blanks, underlines and normal. '
+
+ 'In the blanks prompt you must leave 5 underscores to represent the blank space. '
+ 'In the underlines questions the objective is to pick the words that are incorrect in the given '
+ 'sentence, for these questions you must wrap the answer to the question with the html tag , '
+ 'choose 3 other words to wrap in , place them in the prompt field and use the underlined words '
+ 'in the order they appear in the question for the options A to D, disreguard options that might be '
+ 'included underneath the underlines question and use the ones you wrapped in .'
+ 'In normal you just leave the question as is. '
+
+ f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
+
+ '- Reading Passages, there are two types of reading passages. Reading passages where you will see '
+ 'blanks represented by a (question id) followed by a line, you must format these types of reading '
+ 'passages to be only the text with the brackets that have the question id and line replaced with '
+ '"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
+ 'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
+ 'and place 2 newlines between paragraphs. '
+
+ 'For the reading passages with blanks the template is: {"type": "blanksPassage", '
+ '"text": "}} also place 2 newlines between paragraphs>"}. '
+
+ 'For the reading passage without blanks is: {"type": "passage", "context": ""}\n'
+
+ '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
+ 'options with the question id and the options from a to d. The template is: '
+ f'{self._passage_blank_space_png()}\n'
+
+ 'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
+ 'reading passages are cut off don\'t include them in the JSON.'
+ )
+ }
+
+ def _multiple_choice_png(self):
+ multiple_choice = self._multiple_choice_html()["questions"][0]
+ multiple_choice["type"] = "multipleChoice"
+ multiple_choice.pop("solution")
+ return multiple_choice
+
+ def _passage_blank_space_png(self):
+ passage_blank_space = self._passage_blank_space_html()["words"][0]
+ passage_blank_space["type"] = "fillBlanks"
+ return passage_blank_space
+
+ async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
+ return await self._llm.pydantic_prediction(
+ [self._gpt_instructions_html(),
+ {
+ "role": "user",
+ "content": str(batches)
+ }
+ ],
+ ExamMapper.map_to_exam_model,
+ str(self._level_json_schema())
+ )
+
+ def _gpt_instructions_batches(self):
+ return {
+ "role": "system",
+ "content": (
+ 'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
+ 'components and solve the questions. Each batch may contain overlapping content with the previous '
+ 'batch, or close enough content which needs to be excluded. The components are as follows:'
+
+ '- Part, a standalone part or part of a section of the question sheet: '
+ '{"type": "part", "part": ""}\n'
+
+ '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
+ 'the prompt field of the template: blanks, underlines and normal. '
+
+ 'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
+ 'appropriate option to solve it.'
+
+ 'In a underlines question, the prompt has 4 underlines represented by the html tags , you must '
+ 'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
+ 'the order in which the underlines appear in the prompt you will need to fix it.'
+
+ 'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
+ 'select the appropriate solution.'
+
+ f'The template for these questions is the same: {self._multiple_choice_png()}\n'
+
+ '- Reading Passages, there are two types of reading passages with different templates. The one with '
+ 'type "blanksPassage" where the text field holds the passage and a blank is represented by '
+ '{{}} and the other one with type "passage" that has the context field with just '
+ 'reading passages. For both of these components you will have to remove any additional data that might '
+ 'be related to a question description and also remove some "()" and "_" from blanksPassage'
+ ' if there are any. These components are used in conjunction with other ones.'
+
+ '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
+ 'options with the question id and the options from a to d. The template is: '
+ f'{self._passage_blank_space_png()}\n\n'
+
+ 'Now that you know the possible components here\'s what I want you to do:\n'
+ '1. Remove duplicates. A batch will have duplicates of other batches and the components of '
+ 'the next batch should always take precedence over the previous one batch, what I mean by this is that '
+ 'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
+ 'you pick the next one.\n'
+ '2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
+ 'exercise. For the multiple choice question follow the previous instruction to solve them and place '
+ f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
+ 'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
+ f'the template for this exercise: {self._passage_blank_space_html()}.\n'
+ f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
+ f'You must group the exercises by the parts in the order they appear in the batches components. '
+ f'The context field of a part is the context of a passage component that has text relevant to normal '
+ f'multiple choice questions.\n'
+
+ 'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
+ 'in your response and correctly structure the JSON.'
+ )
+ }
+
+ @staticmethod
+ def fix_ids(response):
+ counter = 1
+ for part in response["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["type"] == "multipleChoice":
+ for question in exercise["questions"]:
+ question["id"] = counter
+ counter += 1
+ if exercise["type"] == "fillBlanks":
+ for i in range(len(exercise["words"])):
+ exercise["words"][i]["id"] = counter
+ exercise["solutions"][i]["id"] = counter
+ counter += 1
+ return response
diff --git a/app/services/impl/listening.py b/app/services/impl/listening.py
index af8f789..14c7a07 100644
--- a/app/services/impl/listening.py
+++ b/app/services/impl/listening.py
@@ -1,15 +1,18 @@
+import queue
import uuid
+from logging import getLogger
from queue import Queue
import random
-from typing import Dict
+from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
- NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant
+ NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
+ FieldsAndExercises
)
-from app.helpers import ExercisesHelper
+from app.helpers import ExercisesHelper, FileHelper
class ListeningService(IListeningService):
@@ -33,25 +36,83 @@ class ListeningService(IListeningService):
self._tts = tts
self._file_storage = file_storage
self._document_store = document_store
+ self._logger = getLogger(__name__)
self._sections = {
"section_1": {
+ "topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
+ "exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
+ "exercise_sample_size": 1,
+ "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
+ "start_id": 1,
"generate_dialogue": self._generate_listening_conversation,
- "type": "conversation"
+ "type": "conversation",
},
"section_2": {
+ "topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
+ "exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
+ "exercise_sample_size": 2,
+ "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
+ "start_id": 11,
"generate_dialogue": self._generate_listening_monologue,
- "type": "monologue"
+ "type": "monologue",
},
"section_3": {
+ "topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
+ "exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
+ "exercise_sample_size": 1,
+ "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
+ "start_id": 21,
"generate_dialogue": self._generate_listening_conversation,
- "type": "conversation"
+ "type": "conversation",
},
"section_4": {
+ "topic": EducationalContent.ACADEMIC_SUBJECTS,
+ "exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
+ "exercise_sample_size": 2,
+ "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
+ "start_id": 31,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
+ async def get_listening_question(
+ self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
+ number_of_exercises_q=queue.Queue(), start_id=-1
+ ):
+ FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
+ section = self._sections[f"section_{section_id}"]
+ if not topic:
+ topic = random.choice(section["topic"])
+
+ if len(req_exercises) == 0:
+ req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
+
+ if number_of_exercises_q.empty():
+ number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
+ section["total_exercises"], len(req_exercises)
+ )
+
+ if start_id == -1:
+ start_id = section["start_id"]
+
+ dialog = await self.generate_listening_question(section_id, topic)
+
+ if section_id in {1, 3}:
+ dialog = self.parse_conversation(dialog)
+
+ self._logger.info(f'Generated {section["type"]}: {dialog}')
+
+ exercises = await self.generate_listening_exercises(
+ section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
+ )
+
+ return {
+ "exercises": exercises,
+ "text": dialog,
+ "difficulty": difficulty
+ }
+
async def generate_listening_question(self, section: int, topic: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
@@ -67,9 +128,10 @@ class ListeningService(IListeningService):
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
- if req_exercise == "multipleChoice":
+ if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
+ n_options = 4 if "multipleChoice" else 3
question = await self._gen_multiple_choice_exercise_listening(
- dialog_type, dialog, number_of_exercises, start_id, difficulty
+ dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
)
exercises.append(question)
@@ -100,10 +162,9 @@ class ListeningService(IListeningService):
return exercises
- async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str):
+ async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
- listening_id = str(uuid.uuid4())
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
@@ -127,8 +188,8 @@ class ListeningService(IListeningService):
else:
template["variant"] = ExamVariant.FULL.value
- (result, listening_id) = await self._document_store.save_to_db_with_id("listening", template, listening_id)
- if result:
+ listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
+ if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
@@ -160,6 +221,20 @@ class ListeningService(IListeningService):
}
]
+ if section == 1:
+ messages.extend([
+ {
+ "role": "user",
+ "content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
+
+ },
+ {
+ "role": "user",
+ "content": 'Try to have spelling of names (cities, people, etc)'
+
+ }
+ ])
+
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
@@ -170,7 +245,11 @@ class ListeningService(IListeningService):
return self._get_conversation_voices(response, True)
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
- context = 'social context' if section == 2 else 'academic subject'
+ head = (
+ 'Generate a comprehensive monologue set in the social context of'
+ if section == 2 else
+ 'Generate a comprehensive and complex monologue on the academic subject of'
+ )
messages = [
{
@@ -182,7 +261,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
- f'Generate a comprehensive monologue set in the {context} of "{topic}". {self.MONOLOGUE_TAIL}'
+ f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
@@ -233,7 +312,7 @@ class ListeningService(IListeningService):
# ==================================================================================================================
async def _gen_multiple_choice_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
+ self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
@@ -248,8 +327,8 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
- f'Generate {str(quantity)} {difficulty} difficulty multiple choice questions of 4 options '
- f'for this {dialog_type}:\n"' + text + '"')
+ f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
+ f'options for this {dialog_type}:\n"' + text + '"')
}
]
@@ -268,7 +347,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_questions_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
+ self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -280,7 +359,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
- f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
+ f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
@@ -300,7 +379,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_notes_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
+ self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -312,7 +391,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
- f'Generate {str(quantity)} {difficulty} difficulty notes taken from this '
+ f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
@@ -357,7 +436,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_form_exercise_listening(
- self, dialog_type: str, text: str, quantity: int, start_id, difficulty
+ self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -369,12 +448,21 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
- f'Generate a form with {str(quantity)} {difficulty} difficulty key-value pairs '
+ f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
+ if dialog_type == "conversation":
+ messages.append({
+ "role": "user",
+ "content": (
+ 'It must be a form and not questions. '
+ 'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
+ )
+ })
+
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@@ -391,3 +479,14 @@ class ListeningService(IListeningService):
"type": "writeBlanks"
}
+ @staticmethod
+ def parse_conversation(conversation_data):
+ conversation_list = conversation_data.get('conversation', [])
+ readable_text = []
+
+ for message in conversation_list:
+ name = message.get('name', 'Unknown')
+ text = message.get('text', '')
+ readable_text.append(f"{name}: {text}")
+
+ return "\n".join(readable_text)
\ No newline at end of file
diff --git a/app/services/impl/reading.py b/app/services/impl/reading.py
index 243d382..50b136d 100644
--- a/app/services/impl/reading.py
+++ b/app/services/impl/reading.py
@@ -12,42 +12,25 @@ class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
- self._passages = {
- "passage_1": {
- "question_type": QuestionType.READING_PASSAGE_1,
- "start_id": 1
- },
- "passage_2": {
- "question_type": QuestionType.READING_PASSAGE_2,
- "start_id": 14
- },
- "passage_3": {
- "question_type": QuestionType.READING_PASSAGE_3,
- "start_id": 27
- }
- }
async def gen_reading_passage(
self,
- passage_id: int,
+ part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
- difficulty: str
+ difficulty: str,
+ start_id: int
):
- _passage = self._passages[f'passage_{str(passage_id)}']
-
- passage = await self.generate_reading_passage(_passage["question_type"], topic)
-
- if passage == "":
- return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
-
- start_id = _passage["start_id"]
+ passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
+
if ExercisesHelper.contains_empty_dict(exercises):
- return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
+ return await self.gen_reading_passage(
+ part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
+ )
return {
"exercises": exercises,
@@ -58,7 +41,17 @@ class ReadingService(IReadingService):
"difficulty": difficulty
}
- async def generate_reading_passage(self, q_type: QuestionType, topic: str):
+ async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
+ part_system_message = {
+ "1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
+ "2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
+ "3": (
+ 'The generated text should be very hard to understand and include different points, theories, '
+ 'subtle differences of opinions from people, correctly sourced to the person who said it, '
+ 'over the specified topic and have multiple paragraphs.'
+ )
+ }
+
messages = [
{
"role": "system",
@@ -69,17 +62,26 @@ class ReadingService(IReadingService):
{
"role": "user",
"content": (
- f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
- f'on the topic of "{topic}". The passage should offer a substantial amount of '
- 'information, analysis, or narrative relevant to the chosen subject matter. This text '
- 'passage aims to serve as the primary reading section of an IELTS test, providing an '
- 'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
- 'does not contain forbidden subjects in muslim countries.'
+ f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
+ f'on the topic of "{topic}". The passage should offer a substantial amount of '
+ 'information, analysis, or narrative relevant to the chosen subject matter. This text '
+ 'passage aims to serve as the primary reading section of an IELTS test, providing an '
+ 'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
+ 'does not contain forbidden subjects in muslim countries.'
)
-
+ },
+ {
+ "role": "system",
+ "content": part_system_message[str(part)]
}
]
+ if part == 3:
+ messages.append({
+ "role": "user",
+ "content": "Use real text excerpts on you generated passage and cite the sources."
+ })
+
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
@@ -95,11 +97,15 @@ class ReadingService(IReadingService):
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
- question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
+ question = await self._gen_summary_fill_blanks_exercise(
+ passage, number_of_exercises, start_id, difficulty
+ )
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
- question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
+ question = await self._gen_true_false_not_given_exercise(
+ passage, number_of_exercises, start_id, difficulty
+ )
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
@@ -114,32 +120,28 @@ class ReadingService(IReadingService):
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
+ elif req_exercise == "ideaMatch":
+ question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
+ exercises.append(question)
+ print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
- async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
+ async def _gen_summary_fill_blanks_exercise(
+ self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
+ ):
messages = [
{
"role": "system",
"content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{ "summary": "summary", "words": ["word_1", "word_2"] }')
+ 'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
+ )
},
{
"role": "user",
- "content": (
- f'Summarize this text: "{text}"'
- )
-
- },
- {
- "role": "user",
- "content": (
- f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
- 'expressions, from the summary.'
- )
+ "content": f'Summarize this text: "{text}"'
}
]
@@ -148,22 +150,45 @@ class ReadingService(IReadingService):
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
- replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
- options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"words": ["word_1", "word_2"] }'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
+ f'from this:\n{response["summary"]}'
+ )
+ }
+ ]
+
+ words_response = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
+ response["words"] = words_response["words"]
+ replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
+ response["summary"], response["words"], start_id
+ )
+ options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
- "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
+ "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
-
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
@@ -210,7 +235,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
- '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+ '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
+ )
},
{
"role": "user",
@@ -243,7 +269,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
- '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
+ '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
+ )
},
{
"role": "user",
@@ -262,7 +289,7 @@ class ReadingService(IReadingService):
options = []
for i, paragraph in enumerate(paragraphs, start=0):
- paragraph["heading"] = headings[i]
+ paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
@@ -285,3 +312,38 @@ class ReadingService(IReadingService):
"sentences": sentences[:quantity],
"type": "matchSentences"
}
+
+ async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"ideas": [ '
+ '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
+ '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
+ ']}'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
+ f'The text: {text}'
+ )
+ }
+ ]
+
+ response = await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+ ideas = response["ideas"]
+
+ return {
+ "id": str(uuid.uuid4()),
+ "allowRepetition": False,
+ "options": ExercisesHelper.build_options(ideas),
+ "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
+ "sentences": ExercisesHelper.build_sentences(ideas, start_id),
+ "type": "matchSentences"
+ }
diff --git a/app/services/impl/speaking.py b/app/services/impl/speaking.py
index 263b4b0..86cfe1c 100644
--- a/app/services/impl/speaking.py
+++ b/app/services/impl/speaking.py
@@ -3,7 +3,7 @@ import os
import re
import uuid
import random
-from typing import Dict, List
+from typing import Dict, List, Optional
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
@@ -27,29 +27,49 @@ class SpeakingService(ISpeakingService):
self._document_store = document_store
self._stt = stt
self._logger = logging.getLogger(__name__)
+
+ # TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
+ # either the difficulty in the request or a random one yet the prompt doesn't change
self._tasks = {
"task_1": {
"get": {
- "json_template": (
- '{"topic": "topic", "question": "question"}'
- ),
+ "json_template": {
+ "first_topic": "topic 1",
+ "second_topic": "topic 2",
+ "questions": [
+ (
+ "Introductory question about the first topic, starting the topic with "
+ "'Let's talk about x' and then the question."
+ ),
+ "Follow up question about the first topic",
+ "Follow up question about the first topic",
+ "Question about second topic",
+ "Follow up question about the second topic",
+ ]
+ },
"prompt": (
- 'Craft a thought-provoking question of {difficulty} difficulty for IELTS Speaking Part 1 '
+ 'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
'that encourages candidates to delve deeply into personal experiences, preferences, or '
- 'insights on the topic of "{topic}". Instruct the candidate to offer not only detailed '
- 'descriptions but also provide nuanced explanations, examples, or anecdotes to enrich '
- 'their response. Make sure that the generated question does not contain forbidden subjects in '
+ 'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
+ 'Make sure that the generated question does not contain forbidden subjects in '
'muslim countries.'
)
}
},
"task_2": {
"get": {
- "json_template": (
- '{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}'
- ),
+ "json_template": {
+ "topic": "topic",
+ "question": "question",
+ "prompts": [
+ "prompt_1",
+ "prompt_2",
+ "prompt_3"
+ ],
+ "suffix": "And explain why..."
+ },
"prompt": (
- 'Create a question of {difficulty} difficulty for IELTS Speaking Part 2 '
+ 'Create a question of medium difficulty for IELTS Speaking Part 2 '
'that encourages candidates to narrate a personal experience or story related to the topic '
'of "{topic}". Include 3 prompts that guide the candidate to describe '
'specific aspects of the experience, such as details about the situation, '
@@ -60,11 +80,18 @@ class SpeakingService(ISpeakingService):
},
"task_3": {
"get": {
- "json_template": (
- '{"topic": "topic", "questions": ["question", "question", "question"]}'
- ),
+ "json_template": {
+ "topic": "topic",
+ "questions": [
+ "Introductory question about the topic.",
+ "Follow up question about the topic",
+ "Follow up question about the topic",
+ "Follow up question about the topic",
+ "Follow up question about the topic"
+ ]
+ },
"prompt": (
- 'Formulate a set of 3 questions of {difficulty} difficulty for IELTS Speaking Part 3 '
+ 'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
'related to the topic. Make sure that the generated question does not contain forbidden '
@@ -74,28 +101,57 @@ class SpeakingService(ISpeakingService):
},
}
- async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
- task_values = self._tasks[f'task_{task_id}']['get']
+ async def get_speaking_part(
+ self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
+ ) -> Dict:
+ task_values = self._tasks[f'task_{part}']['get']
+
+ if part == 1:
+ task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
+ else:
+ task_prompt = task_values["prompt"].format(topic=topic)
+
messages = [
{
"role": "system",
"content": (
- 'You are a helpful assistant designed to output JSON on this format: ' +
- task_values["json_template"]
+ 'You are a helpful assistant designed to output JSON on this format: '
+ f'{task_values["json_template"]}'
)
},
{
"role": "user",
- "content": str(task_values["prompt"]).format(topic=topic, difficulty=difficulty)
+ "content": task_prompt
}
]
+ part_specific = {
+ "1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
+ "2": (
+ 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
+ 'that start with "And explain why".'
+ )
+ }
+
+ if part in {1, 2}:
+ messages.append({
+ "role": "user",
+ "content": part_specific[str(part)]
+ })
+
+ if part in {1, 3}:
+ messages.append({
+ "role": "user",
+ "content": 'They must be 1 single question each and not be double-barreled questions.'
+ })
+
+ fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
+
response = await self._llm.prediction(
- GPTModels.GPT_4_O, messages, FieldsAndExercises.GEN_FIELDS, TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
- # TODO: this was on GET /speaking_task_3 don't know if it is intentional only for 3
- if task_id == 3:
+ if part == 3:
# Remove the numbers from the questions only if the string starts with a number
response["questions"] = [
re.sub(r"^\d+\.\s*", "", question)
@@ -103,117 +159,15 @@ class SpeakingService(ISpeakingService):
for question in response["questions"]
]
- response["type"] = task_id
+ response["type"] = part
response["difficulty"] = difficulty
- response["topic"] = topic
+
+ if part in {2, 3}:
+ response["topic"] = topic
+
return response
- async def grade_speaking_task_1_and_2(
- self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
- ):
- request_id = uuid.uuid4()
- req_data = {
- "question": question,
- "answer": answer_firebase_path
- }
- self._logger.info(
- f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
- f'Use this id to track the logs: {str(request_id)} - Request data: {str(req_data)}'
- )
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {answer_firebase_path}')
-
- await self._file_storage.download_firebase_file(answer_firebase_path, sound_file_name)
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloaded file {answer_firebase_path} to {sound_file_name}')
-
- answer = await self._stt.speech_to_text(sound_file_name)
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer}')
-
- if TextHelper.has_x_words(answer, 20):
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- },
- {
- "role": "user",
- "content": (
- f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
- 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
- 'assign a score of 0 if the response fails to address the question. Additionally, provide '
- 'detailed commentary highlighting both strengths and weaknesses in the response.'
- f'\n Question: "{question}" \n Answer: "{answer}"')
- }
- ]
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answer.')
-
- response = await self._llm.prediction(
- GPTModels.GPT_3_5_TURBO,
- messages,
- ["comment"],
- TemperatureSettings.GRADING_TEMPERATURE
- )
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answer graded: {str(response)}')
-
- perfect_answer_messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"answer": "perfect answer"}'
- )
- },
- {
- "role": "user",
- "content": (
- 'Provide a perfect answer according to ielts grading system to the following '
- f'Speaking Part {task} question: "{question}"')
- }
- ]
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting perfect answer.')
-
- response = await self._llm.prediction(
- GPTModels.GPT_3_5_TURBO,
- perfect_answer_messages,
- ["answer"],
- TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
- response['perfect_answer'] = response["answer"]
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Perfect answer: ' + response['perfect_answer'])
-
- response['transcript'] = answer
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting fixed text.')
-
- response['fixed_text'] = await self._get_speaking_corrections(answer)
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Fixed text: ' + response['fixed_text'])
-
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = self._calculate_overall(response)
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
- return response
- else:
- self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - '
- f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
- )
-
- return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
-
- # TODO: When there's more time grade_speaking_task_1_2 can be merged with this, when there's more time
- async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
+ async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
request_id = uuid.uuid4()
self._logger.info(
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
@@ -222,157 +176,219 @@ class SpeakingService(ISpeakingService):
text_answers = []
perfect_answers = []
- self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
- )
+
+ if task != 2:
+ self._logger.info(
+ f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
+ )
+
for item in answers:
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {item["answer"]}')
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}')
await self._file_storage.download_firebase_file(item["answer"], sound_file_name)
self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - '
- 'Downloaded file ' + item["answer"] + f' to {sound_file_name}'
+ f'POST - speaking_task_{task} - {request_id} - '
+ f'Downloaded file {item["answer"]} to {sound_file_name}'
)
answer_text = await self._stt.speech_to_text(sound_file_name)
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer_text}')
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}')
text_answers.append(answer_text)
item["answer"] = answer_text
os.remove(sound_file_name)
+ # TODO: This will end the grading of all answers if a single one does not have enough words
+ # don't know if this is intended
if not TextHelper.has_x_words(answer_text, 20):
self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - '
- f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}')
+ f'POST - speaking_task_{task} - {request_id} - '
+ f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}'
+ )
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
- perfect_answer_messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"answer": "perfect answer"}'
- )
- },
- {
- "role": "user",
- "content": (
- 'Provide a perfect answer according to ielts grading system to the following '
- f'Speaking Part {task} question: "{item["question"]}"'
- )
- }
- ]
self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - '
+ f'POST - speaking_task_{task} - {request_id} - '
f'Requesting perfect answer for question: {item["question"]}'
)
+ perfect_answers.append(await self._get_perfect_answer(task, item["question"]))
- perfect_answers.append(
- await self._llm.prediction(
- GPTModels.GPT_3_5_TURBO,
- perfect_answer_messages,
- ["answer"],
- TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
+ if task in {1, 3}:
+ self._logger.info(
+ f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.'
)
- messages = [
- {
- "role": "system",
- "content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"comment": "comment about answer quality", "overall": 0.0, '
- '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
- '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
- }
- ]
- message = (
- f"Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a "
- "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
- "assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
- "commentary highlighting both strengths and weaknesses in the response."
- "\n\n The questions and answers are: \n\n'")
+ formatted_text = ""
+ for i, entry in enumerate(answers, start=1):
+ formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
+ formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
- self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - Formatting answers and questions for prompt.'
- )
+ self._logger.info(
+ f'POST - speaking_task_{task} - {request_id} - '
+ f'Formatted answers and questions for prompt: {formatted_text}'
+ )
+ questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
+ else:
+ questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"'
- formatted_text = ""
- for i, entry in enumerate(answers, start=1):
- formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
- formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).')
+ response = await self._grade_task(task, questions_and_answers)
- self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - Formatted answers and questions for prompt: {formatted_text}'
- )
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}')
- message += formatted_text
+ if task in {1, 3}:
+ self._logger.info(
+ f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.')
- messages.append({
- "role": "user",
- "content": message
- })
+ # TODO: check if it is answer["answer"] instead
+ for i, answer in enumerate(perfect_answers, start=1):
+ response['perfect_answer_' + str(i)] = answer
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answers.')
+ self._logger.info(
+ f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.'
+ )
- response = await self._llm.prediction(
- GPTModels.GPT_3_5_TURBO, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
- )
+ for i, answer in enumerate(text_answers, start=1):
+ response['transcript_' + str(i)] = answer
+ response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
+ else:
+ response['transcript'] = answers[0]["answer"]
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answers graded: {str(response)}')
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.')
+ response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"])
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}')
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Adding perfect answers to response.')
-
- for i, answer in enumerate(perfect_answers, start=1):
- response['perfect_answer_' + str(i)] = answer
-
- self._logger.info(
- f'POST - speaking_task_{task} - {str(request_id)} - Adding transcript and fixed texts to response.'
- )
-
- for i, answer in enumerate(text_answers, start=1):
- response['transcript_' + str(i)] = answer
- response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
-
- if response["overall"] == "0.0" or response["overall"] == 0.0:
- response["overall"] = self._calculate_overall(response)
-
- self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
+ response['perfect_answer'] = perfect_answers[0]["answer"]
+ response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
+ self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}')
return response
# ==================================================================================================================
# grade_speaking_task helpers
# ==================================================================================================================
+ async def _get_perfect_answer(self, task: int, question: str):
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ 'Provide a perfect answer according to ielts grading system to the following '
+ f'Speaking Part {task} question: "{question}"'
+ )
+ }
+ ]
+
+ if task == 1:
+ messages.append({
+ "role": "user",
+ "content": 'The answer must be 2 or 3 sentences long.'
+ })
+
+ gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
+
+ return await self._llm.prediction(
+ gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
+ )
+
+ async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
+ 'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
+ 'assign a score of 0 if the response fails to address the question. Additionally, provide '
+ 'detailed commentary highlighting both strengths and weaknesses in the response.'
+ ) + questions_and_answers
+ }
+ ]
+
+ task_specific = {
+ "1": (
+ 'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
+ 'student that they should be.'
+ ),
+ "2": 'Address the student as "you"',
+ "3": 'Address the student as "you" and pay special attention to coherence between the answers.'
+ }
+
+ messages.append({
+ "role": "user",
+ "content": task_specific[str(task)]
+ })
+
+ if task in {1, 3}:
+ messages.extend([
+ {
+ "role": "user",
+ "content": (
+ 'For pronunciations act as if you heard the answers and they were transcripted '
+ 'as you heard them.'
+ )
+ },
+ {
+ "role": "user",
+ "content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
+ }
+ ])
+
+ return await self._llm.prediction(
+ GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
+ )
+
+ @staticmethod
+ def _fix_speaking_overall(overall: float, task_response: dict):
+ grades = [category["grade"] for category in task_response.values()]
+
+ if overall > max(grades) or overall < min(grades):
+ total_sum = sum(grades)
+ average = total_sum / len(grades)
+ rounded_average = round(average, 0)
+ return rounded_average
+
+ return overall
+
@staticmethod
def _zero_rating(comment: str):
return {
"comment": comment,
"overall": 0,
"task_response": {
- "Fluency and Coherence": 0,
- "Lexical Resource": 0,
- "Grammatical Range and Accuracy": 0,
- "Pronunciation": 0
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": ""
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
- @staticmethod
- def _calculate_overall(response: Dict):
- return round(
- (
- response["task_response"]["Fluency and Coherence"] +
- response["task_response"]["Lexical Resource"] +
- response["task_response"]["Grammatical Range and Accuracy"] +
- response["task_response"]["Pronunciation"]
- ) / 4, 1
- )
-
async def _get_speaking_corrections(self, text):
messages = [
{
@@ -409,6 +425,7 @@ class SpeakingService(ISpeakingService):
self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}')
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
+ avatar = (random.choice(list(AvatarEnum))).value
template_index = part - 1
# Using list comprehension to find the element with the desired value in the 'type' field
@@ -418,26 +435,12 @@ class SpeakingService(ISpeakingService):
if found_exercises:
exercise = found_exercises[0]
self._logger.info(f'Creating video for speaking part {part}')
- if part in {1, 2}:
- result = await self._create_video(
- exercise["question"],
- (random.choice(list(AvatarEnum))).value,
- f'Failed to create video for part {part} question: {str(exercise["question"])}'
- )
- if result is not None:
- if part == 2:
- template["exercises"][template_index]["prompts"] = exercise["prompts"]
-
- template["exercises"][template_index]["text"] = exercise["question"]
- template["exercises"][template_index]["title"] = exercise["topic"]
- template["exercises"][template_index]["video_url"] = result["video_url"]
- template["exercises"][template_index]["video_path"] = result["video_path"]
- else:
+ if part in {1, 3}:
questions = []
for question in exercise["questions"]:
result = await self._create_video(
question,
- (random.choice(list(AvatarEnum))).value,
+ avatar,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
@@ -449,63 +452,139 @@ class SpeakingService(ISpeakingService):
questions.append(video)
template["exercises"][template_index]["prompts"] = questions
- template["exercises"][template_index]["title"] = exercise["topic"]
+ if part == 1:
+ template["exercises"][template_index]["first_title"] = exercise["first_topic"]
+ template["exercises"][template_index]["second_title"] = exercise["second_topic"]
+ else:
+ template["exercises"][template_index]["title"] = exercise["topic"]
+ else:
+ result = await self._create_video(
+ exercise["question"],
+ avatar,
+ f'Failed to create video for part {part} question: {str(exercise["question"])}'
+ )
+ if result is not None:
+ template["exercises"][template_index]["prompts"] = exercise["prompts"]
+ template["exercises"][template_index]["text"] = exercise["question"]
+ template["exercises"][template_index]["title"] = exercise["topic"]
+ template["exercises"][template_index]["video_url"] = result["video_url"]
+ template["exercises"][template_index]["video_path"] = result["video_path"]
if not found_exercises:
template["exercises"].pop(template_index)
return template
- # TODO: Check if it is intended to log the original question
- async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
- if len(prompts) > 0:
- question = original_question + " In your answer you should consider: " + " ".join(prompts)
- else:
- question = original_question
-
- error_msg = f'Failed to create video for part 1 question: {original_question}'
-
- result = await self._create_video(
- question,
- avatar,
- error_msg
+ async def generate_video(
+ self, part: int, avatar: str, topic: str, questions: list[str],
+ *,
+ second_topic: Optional[str] = None,
+ prompts: Optional[list[str]] = None,
+ suffix: Optional[str] = None,
+ ):
+ request_id = str(uuid.uuid4())
+ # TODO: request data
+ self._logger.info(
+ f'POST - generate_video_{part} - Received request to generate video {part}. '
+ f'Use this id to track the logs: {request_id} - Request data: " + str(request.get_json())'
)
- if result is not None:
- return {
- "text": original_question,
- "prompts": prompts,
- "title": topic,
- **result,
- "type": "speaking",
- "id": uuid.uuid4()
- }
- else:
- return str(error_msg)
+ part_questions = self._get_part_questions(part, questions, avatar)
+ videos = []
- async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
- sp_questions = []
- self._logger.info('Creating videos for speaking part 3')
- for question in questions:
+ self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating videos for speaking part {part}.')
+ for question in part_questions:
+ self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating video for question: {question}')
result = await self._create_video(
question,
avatar,
- f'Failed to create video for part 3 question: {question}'
+ 'POST - generate_video_{p} - {r} - Failed to create video for part {p} question: {q}'.format(
+ p=part, r=request_id, q=question
+ )
)
-
if result is not None:
+ self._logger.info(f'POST - generate_video_{part} - {request_id} - Video created')
+ self._logger.info(
+ f'POST - generate_video_{part} - {request_id} - Uploaded video to firebase: {result["video_url"]}'
+ )
video = {
"text": question,
- **result
+ "video_path": result["video_path"],
+ "video_url": result["video_url"]
}
- sp_questions.append(video)
+ videos.append(video)
- return {
- "prompts": sp_questions,
- "title": topic,
- "type": "interactiveSpeaking",
- "id": uuid.uuid4()
- }
+ if part == 2 and len(videos) == 0:
+ raise Exception(f'Failed to create video for part 2 question: {questions[0]}')
+
+ return self._get_part_response(part, topic, videos, second_topic, prompts, suffix)
+
+ @staticmethod
+ def _get_part_questions(part: int, questions: list[str], avatar: str):
+ part_questions: list[str] = []
+
+ if part == 1:
+ id_to_name = {
+ "5912afa7c77c47d3883af3d874047aaf": "MATTHEW",
+ "9e58d96a383e4568a7f1e49df549e0e4": "VERA",
+ "d2cdd9c0379a4d06ae2afb6e5039bd0c": "EDWARD",
+ "045cb5dcd00042b3a1e4f3bc1c12176b": "TANYA",
+ "1ae1e5396cc444bfad332155fdb7a934": "KAYLA",
+ "0ee6aa7cc1084063a630ae514fccaa31": "JEROME",
+ "5772cff935844516ad7eeff21f839e43": "TYLER",
+
+ }
+ part_questions.extend(
+ [
+ "Hello my name is " + id_to_name.get(avatar) + ", what is yours?",
+ "Do you work or do you study?",
+ *questions
+ ]
+ )
+ elif part == 2:
+ # Removed as the examiner should not say what is on the card.
+ # question = question + " In your answer you should consider: " + " ".join(prompts) + suffix
+ part_questions.append(f'{questions[0]}\nYou have 1 minute to take notes.')
+ elif part == 3:
+ part_questions = questions
+
+ return part_questions
+
+ @staticmethod
+ def _get_part_response(
+ part: int,
+ topic: str,
+ videos: list[dict],
+ second_topic: Optional[str],
+ prompts: Optional[list[str]],
+ suffix: Optional[str]
+ ):
+ response = {}
+ if part == 1:
+ response = {
+ "prompts": videos,
+ "first_title": topic,
+ "second_title": second_topic,
+ "type": "interactiveSpeaking"
+ }
+ if part == 2:
+ response = {
+ "prompts": prompts,
+ "title": topic,
+ "suffix": suffix,
+ "type": "speaking",
+ # includes text, video_url and video_path
+ **videos[0]
+ }
+ if part == 3:
+ response = {
+ "prompts": videos,
+ "title": topic,
+ "type": "interactiveSpeaking",
+ }
+
+ response["id"] = str(uuid.uuid4())
+ return response
async def _create_video(self, question: str, avatar: str, error_message: str):
result = await self._vid_gen.create_video(question, avatar)
@@ -519,3 +598,36 @@ class SpeakingService(ISpeakingService):
}
self._logger.error(error_message)
return None
+
+ @staticmethod
+ def _grade_template():
+ return {
+ "comment": "extensive comment about answer quality",
+ "overall": 0.0,
+ "task_response": {
+ "Fluency and Coherence": {
+ "grade": 0.0,
+ "comment": (
+ "extensive comment about fluency and coherence, use examples to justify the grade awarded."
+ )
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": (
+ "extensive comment about grammatical range and accuracy, use examples to justify the "
+ "grade awarded."
+ )
+ },
+ "Pronunciation": {
+ "grade": 0.0,
+ "comment": (
+ "extensive comment about pronunciation on the transcribed answer, use examples to justify the "
+ "grade awarded."
+ )
+ }
+ }
+ }
\ No newline at end of file
diff --git a/app/services/impl/third_parties/openai.py b/app/services/impl/third_parties/openai.py
index 3c7eed2..e049d93 100644
--- a/app/services/impl/third_parties/openai.py
+++ b/app/services/impl/third_parties/openai.py
@@ -1,13 +1,16 @@
import json
import re
import logging
-from typing import List, Optional
+from typing import List, Optional, Callable, TypeVar
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from app.services.abc import ILLMService
from app.helpers import count_tokens
from app.configs.constants import BLACKLISTED_WORDS
+from pydantic import BaseModel
+
+T = TypeVar('T', bound=BaseModel)
class OpenAI(ILLMService):
@@ -18,6 +21,7 @@ class OpenAI(ILLMService):
def __init__(self, client: AsyncOpenAI):
self._client = client
self._logger = logging.getLogger(__name__)
+ self._default_model = "gpt-4o-2024-08-06"
async def prediction(
self,
@@ -94,4 +98,53 @@ class OpenAI(ILLMService):
@staticmethod
def _check_fields(obj, fields):
- return all(field in obj for field in fields)
\ No newline at end of file
+ return all(field in obj for field in fields)
+
+ async def pydantic_prediction(
+ self,
+ messages: List[ChatCompletionMessageParam],
+ map_to_model: Callable,
+ json_scheme: str,
+ *,
+ model: Optional[str] = None,
+ temperature: Optional[float] = None,
+ max_retries: int = 3
+ ) -> List[T] | T | None:
+ params = {
+ "messages": messages,
+ "response_format": {"type": "json_object"},
+ "model": model if model else self._default_model
+ }
+
+ if temperature:
+ params["temperature"] = temperature
+
+ attempt = 0
+ while attempt < max_retries:
+ result = await self._client.chat.completions.create(**params)
+ result_content = result.choices[0].message.content
+ try:
+ result_json = json.loads(result_content)
+ return map_to_model(result_json)
+ except Exception as e:
+ attempt += 1
+ self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
+ params["messages"] = [
+ {
+ "role": "user",
+ "content": (
+ "Your previous response wasn't in the json format I've explicitly told you to output. "
+ f"In your next response, you will fix it and return me just the json I've asked."
+ )
+ },
+ {
+ "role": "user",
+ "content": (
+ f"Previous response: {result_content}\n"
+ f"JSON format: {json_scheme}"
+ )
+ }
+ ]
+ if attempt >= max_retries:
+ self._logger.error(f"Max retries exceeded!")
+ return None
diff --git a/app/services/impl/training.py b/app/services/impl/training.py
deleted file mode 100644
index d74abcf..0000000
--- a/app/services/impl/training.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import re
-from functools import reduce
-
-from app.configs.constants import TemperatureSettings, GPTModels
-from app.helpers import count_tokens
-from app.services.abc import ILLMService, ITrainingService
-
-
-class TrainingService(ITrainingService):
-
- def __init__(self, llm: ILLMService):
- self._llm = llm
-
- async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
- messages = self._get_question_tips(question, answer, correct_answer, context)
-
- token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
- map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
-
- response = await self._llm.prediction(
- GPTModels.GPT_3_5_TURBO,
- messages,
- None,
- TemperatureSettings.TIPS_TEMPERATURE,
- token_count=token_count
- )
-
- if isinstance(response, str):
- response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
-
- return response
-
- @staticmethod
- def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
- messages = [
- {
- "role": "user",
- "content": (
- "You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
- "help students understand why it was a wrong answer and gives helpful insight for the future. "
- "The tip should refer to the context and question."
- ),
- }
- ]
-
- if not (context is None or context == ""):
- messages.append({
- "role": "user",
- "content": f"This is the context for the question: {context}",
- })
-
- messages.extend([
- {
- "role": "user",
- "content": f"This is the question: {question}",
- },
- {
- "role": "user",
- "content": f"This is the answer: {answer}",
- },
- {
- "role": "user",
- "content": f"This is the correct answer: {correct_answer}",
- }
- ])
-
- return messages
-
diff --git a/app/services/impl/training/__init__.py b/app/services/impl/training/__init__.py
new file mode 100644
index 0000000..8ea231b
--- /dev/null
+++ b/app/services/impl/training/__init__.py
@@ -0,0 +1,7 @@
+from .training import TrainingService
+from .kb import TrainingContentKnowledgeBase
+
+__all__ = [
+ "TrainingService",
+ "TrainingContentKnowledgeBase"
+]
diff --git a/app/services/impl/training/kb.py b/app/services/impl/training/kb.py
new file mode 100644
index 0000000..dce316e
--- /dev/null
+++ b/app/services/impl/training/kb.py
@@ -0,0 +1,88 @@
+import json
+import os
+from logging import getLogger
+from typing import Dict, List
+
+import faiss
+import pickle
+
+from app.services.abc import IKnowledgeBase
+
+
+class TrainingContentKnowledgeBase(IKnowledgeBase):
+
+ def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
+ self._embedding_model = embeddings
+ self._tips = None # self._read_json(path)
+ self._category_metadata = None
+ self._indices = None
+ self.load_indices_and_metadata()
+ self._logger = getLogger(__name__)
+
+ @staticmethod
+ def _read_json(path: str) -> Dict[str, any]:
+ with open(path, 'r', encoding="utf-8") as json_file:
+ return json.loads(json_file.read())
+
+ def print_category_count(self):
+ category_tips = {}
+ for unit in self._tips['units']:
+ for page in unit['pages']:
+ for tip in page['tips']:
+ category = tip['category'].lower().replace(" ", "_")
+ if category not in category_tips:
+ category_tips[category] = 0
+ else:
+ category_tips[category] = category_tips[category] + 1
+ print(category_tips)
+
+ def create_embeddings_and_save_them(self) -> None:
+ category_embeddings = {}
+ category_metadata = {}
+
+ for unit in self._tips['units']:
+ for page in unit['pages']:
+ for tip in page['tips']:
+ category = tip['category'].lower().replace(" ", "_")
+ if category not in category_embeddings:
+ category_embeddings[category] = []
+ category_metadata[category] = []
+
+ category_embeddings[category].append(tip['embedding'])
+ category_metadata[category].append({"id": tip['id'], "text": tip['text']})
+
+ category_indices = {}
+ for category, embeddings in category_embeddings.items():
+ embeddings_array = self._embedding_model.encode(embeddings)
+ index = faiss.IndexFlatL2(embeddings_array.shape[1])
+ index.add(embeddings_array)
+ category_indices[category] = index
+
+ faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
+
+ with open("./faiss/tips_metadata.pkl", "wb") as f:
+ pickle.dump(category_metadata, f)
+
+ def load_indices_and_metadata(
+ self,
+ directory: str = './faiss',
+ suffix: str = '_tips_index.faiss',
+ metadata_path: str = './faiss/tips_metadata.pkl'
+ ):
+ files = os.listdir(directory)
+ self._indices = {}
+ for file in files:
+ if file.endswith(suffix):
+ self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
+ self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
+
+ with open(metadata_path, 'rb') as f:
+ self._category_metadata = pickle.load(f)
+ self._logger.info("Loaded tips metadata")
+
+ def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
+ query_embedding = self._embedding_model.encode([query])
+ index = self._indices[category]
+ D, I = index.search(query_embedding, top_k)
+ results = [self._category_metadata[category][i] for i in I[0]]
+ return results
diff --git a/app/services/impl/training/training.py b/app/services/impl/training/training.py
new file mode 100644
index 0000000..53f897b
--- /dev/null
+++ b/app/services/impl/training/training.py
@@ -0,0 +1,459 @@
+import re
+from datetime import datetime
+from functools import reduce
+from logging import getLogger
+
+from typing import Dict, List
+
+from app.configs.constants import TemperatureSettings, GPTModels
+from app.helpers import count_tokens
+from app.repositories.abc import IDocumentStore
+from app.services.abc import ILLMService, ITrainingService, IKnowledgeBase
+from app.dtos.training import *
+
+
+class TrainingService(ITrainingService):
+ TOOLS = [
+ 'critical_thinking',
+ 'language_for_writing',
+ 'reading_skills',
+ 'strategy',
+ 'words',
+ 'writing_skills'
+ ]
+ # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
+
+ def __init__(self, llm: ILLMService, firestore: IDocumentStore, training_kb: IKnowledgeBase):
+ self._llm = llm
+ self._db = firestore
+ self._kb = training_kb
+ self._logger = getLogger(__name__)
+
+ async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
+ messages = self._get_question_tips(question, answer, correct_answer, context)
+
+ token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
+ map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
+
+ response = await self._llm.prediction(
+ GPTModels.GPT_3_5_TURBO,
+ messages,
+ None,
+ TemperatureSettings.TIPS_TEMPERATURE,
+ token_count=token_count
+ )
+
+ if isinstance(response, str):
+ response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
+
+ return response
+
+ @staticmethod
+ def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
+ messages = [
+ {
+ "role": "user",
+ "content": (
+ "You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
+ "help students understand why it was a wrong answer and gives helpful insight for the future. "
+ "The tip should refer to the context and question."
+ ),
+ }
+ ]
+
+ if not (context is None or context == ""):
+ messages.append({
+ "role": "user",
+ "content": f"This is the context for the question: {context}",
+ })
+
+ messages.extend([
+ {
+ "role": "user",
+ "content": f"This is the question: {question}",
+ },
+ {
+ "role": "user",
+ "content": f"This is the answer: {answer}",
+ },
+ {
+ "role": "user",
+ "content": f"This is the correct answer: {correct_answer}",
+ }
+ ])
+
+ return messages
+
+ async def get_training_content(self, training_content: Dict) -> Dict:
+ user, stats = training_content["userID"], training_content["stats"]
+ exam_data, exam_map = await self._sort_out_solutions(stats)
+ training_content = await self._get_exam_details_and_tips(exam_data)
+ tips = self._query_kb(training_content.queries)
+ usefull_tips = await self._get_usefull_tips(exam_data, tips)
+ exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
+
+ weak_areas = {"weak_areas": []}
+ for area in training_content.weak_areas:
+ weak_areas["weak_areas"].append(area.dict())
+
+ training_doc = {
+ 'created_at': int(datetime.now().timestamp() * 1000),
+ **exam_map,
+ **usefull_tips.dict(),
+ **weak_areas,
+ "user": user
+ }
+ doc_id = await self._db.save_to_db('training', training_doc)
+ return {
+ "id": doc_id
+ }
+
+ @staticmethod
+ def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
+ new_exam_map = {"exams": []}
+ for detail in details:
+ new_exam_map["exams"].append({
+ "id": detail.exam_id,
+ "date": detail.date,
+ "performance_comment": detail.performance_comment,
+ "detailed_summary": detail.detailed_summary,
+ **exam_map[detail.exam_id]
+ })
+ return new_exam_map
+
+ def _query_kb(self, queries: List[QueryDTO]):
+ map_categories = {
+ "critical_thinking": "ct_focus",
+ "language_for_writing": "language_for_writing",
+ "reading_skills": "reading_skill",
+ "strategy": "strategy",
+ "writing_skills": "writing_skill"
+ }
+
+ tips = {"tips": []}
+ for query in queries:
+ if query.category == "words":
+ tips["tips"].extend(
+ self._kb.query_knowledge_base(query.text, "word_link")
+ )
+ tips["tips"].extend(
+ self._kb.query_knowledge_base(query.text, "word_partners")
+ )
+ else:
+ if query.category in map_categories:
+ tips["tips"].extend(
+ self._kb.query_knowledge_base(query.text, map_categories[query.category])
+ )
+ else:
+ self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
+ return tips
+
+ async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
+ json_schema = (
+ '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
+ ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
+ )
+ messages = [
+ {
+ "role": "user",
+ "content": (
+ f"I'm going to provide you with exam data, you will take the exam data and fill this json "
+ f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
+ 'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
+ 'summary of the student\'s performance, "weak_areas" are identified areas'
+ ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
+ ' with sentence structure and punctuation.", the "queries" field is where you will write queries '
+ 'for tips that will be displayed to the student, the category attribute is a collection of '
+ 'embeddings and the text will be the text used to query the knowledge base. The categories are '
+ f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
+ '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
+ ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
+ 'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
+ 'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
+ 'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
+ 'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
+ 'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
+ 'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
+ 'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
+ )
+ },
+ {
+ "role": "user",
+ "content": f'Exam Data: {str(exam_data)}'
+ }
+ ]
+ return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema)
+
+ async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
+ json_schema = (
+ '{ "tip_ids": [] }'
+ )
+ messages = [
+ {
+ "role": "user",
+ "content": (
+ f"I'm going to provide you with tips and I want you to return to me the tips that "
+ f"can be usefull for the student that made the exam that I'm going to send you, return "
+ f"me the tip ids in this json format {json_schema}."
+ )
+ },
+ {
+ "role": "user",
+ "content": f'Exam Data: {str(exam_data)}'
+ },
+ {
+ "role": "user",
+ "content": f'Tips: {str(tips)}'
+ }
+ ]
+ return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema)
+
+ @staticmethod
+ def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
+ parsed_response = {
+ "details": [DetailsDTO(**detail) for detail in response["details"]],
+ "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
+ "queries": [QueryDTO(**query) for query in response["queries"]]
+ }
+ return TrainingContentDTO(**parsed_response)
+
+ async def _sort_out_solutions(self, stats):
+ grouped_stats = {}
+ for stat in stats:
+ session_key = f'{str(stat["date"])}-{stat["user"]}'
+ module = stat["module"]
+ exam_id = stat["exam"]
+
+ if session_key not in grouped_stats:
+ grouped_stats[session_key] = {}
+ if module not in grouped_stats[session_key]:
+ grouped_stats[session_key][module] = {
+ "stats": [],
+ "exam_id": exam_id
+ }
+ grouped_stats[session_key][module]["stats"].append(stat)
+
+ exercises = {}
+ exam_map = {}
+ for session_key, modules in grouped_stats.items():
+ exercises[session_key] = {}
+ for module, module_stats in modules.items():
+ exercises[session_key][module] = {}
+
+ exam_id = module_stats["exam_id"]
+ if exam_id not in exercises[session_key][module]:
+ exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
+
+ exam_total_questions = 0
+ exam_total_correct = 0
+
+ for stat in module_stats["stats"]:
+ exam_total_questions += stat["score"]["total"]
+ exam_total_correct += stat["score"]["correct"]
+ exercises[session_key][module][exam_id]["date"] = stat["date"]
+
+ if session_key not in exam_map:
+ exam_map[session_key] = {"stat_ids": [], "score": 0}
+ exam_map[session_key]["stat_ids"].append(stat["id"])
+
+ exam = await self._db.get_doc_by_id(module, exam_id)
+ if module == "listening":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_listening_solutions(stat, exam))
+ elif module == "reading":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_reading_solutions(stat, exam))
+ elif module == "writing":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_writing_prompts_and_answers(stat, exam)
+ )
+ elif module == "speaking":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_speaking_solutions(stat, exam)
+ )
+ elif module == "level":
+ exercises[session_key][module][exam_id]["exercises"].extend(
+ self._get_level_solutions(stat, exam)
+ )
+
+ exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
+ exam_map[session_key]["module"] = module
+
+ return {"exams": exercises}, exam_map
+
+ def _get_writing_prompts_and_answers(self, stat, exam):
+ result = []
+ try:
+ exercises = []
+ for solution in stat['solutions']:
+ answer = solution['solution']
+ exercise_id = solution['id']
+ exercises.append({
+ "exercise_id": exercise_id,
+ "answer": answer
+ })
+ for exercise in exercises:
+ for exam_exercise in exam["exercises"]:
+ if exam_exercise["id"] == exercise["exercise_id"]:
+ result.append({
+ "exercise": exam_exercise["prompt"],
+ "answer": exercise["answer"]
+ })
+
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+
+ return result
+
+ @staticmethod
+ def _get_mc_question(exercise, stat):
+ shuffle_maps = stat.get("shuffleMaps", [])
+ answer = stat["solutions"] if len(shuffle_maps) == 0 else []
+ if len(shuffle_maps) != 0:
+ for solution in stat["solutions"]:
+ shuffle_map = [
+ item["map"] for item in shuffle_maps
+ if item["questionID"] == solution["question"]
+ ]
+ answer.append({
+ "question": solution["question"],
+ "option": shuffle_map[solution["option"]]
+ })
+ return {
+ "question": exercise["prompt"],
+ "exercise": exercise["questions"],
+ "answer": stat["solutions"]
+ }
+
+ @staticmethod
+ def _swap_key_name(d, original_key, new_key):
+ d[new_key] = d.pop(original_key)
+ return d
+
+ def _get_level_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "fillBlanks":
+ result.append({
+ "prompt": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": [
+ self._swap_key_name(item, 'solution', 'option')
+ for item in stat["solutions"]
+ ]
+ })
+ elif stat["type"] == "multipleChoice":
+ result.append(self._get_mc_question(exercise, stat))
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+ def _get_listening_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "writeBlanks":
+ result.append({
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "solution": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "fillBlanks":
+ result.append({
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "multipleChoice":
+ result.append(self._get_mc_question(exercise, stat))
+
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+ @staticmethod
+ def _find_shuffle_map(shuffle_maps, question_id):
+ return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
+
+ def _get_speaking_solutions(self, stat, exam):
+ result = {}
+ try:
+ result = {
+ "comments": {
+ key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
+ ,
+ "exercises": {}
+ }
+
+ for exercise in exam["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "interactiveSpeaking":
+ for i in range(len(exercise["prompts"])):
+ result["exercises"][f"exercise_{i+1}"] = {
+ "question": exercise["prompts"][i]["text"]
+ }
+ for i in range(len(exercise["prompts"])):
+ answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
+ result["exercises"][f"exercise_{i+1}"]["answer"] = answer
+ elif stat["type"] == "speaking":
+ result["exercises"]["exercise_1"] = {
+ "question": exercise["text"],
+ "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
+ }
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return [result]
+
+ def _get_reading_solutions(self, stat, exam):
+ result = []
+ try:
+ for part in exam["parts"]:
+ text = part["text"]
+ for exercise in part["exercises"]:
+ if exercise["id"] == stat["exercise"]:
+ if stat["type"] == "fillBlanks":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "words": exercise["words"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "writeBlanks":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "template": exercise["text"],
+ "solutions": exercise["solutions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "trueFalse":
+ result.append({
+ "text": text,
+ "questions": exercise["questions"],
+ "answer": stat["solutions"]
+ })
+ elif stat["type"] == "matchSentences":
+ result.append({
+ "text": text,
+ "question": exercise["prompt"],
+ "sentences": exercise["sentences"],
+ "options": exercise["options"],
+ "answer": stat["solutions"]
+ })
+ except KeyError as e:
+ self._logger.warning(f"Malformed stat object: {str(e)}")
+ return result
+
+
diff --git a/app/services/impl/writing.py b/app/services/impl/writing.py
index 3425cd3..9bf19ff 100644
--- a/app/services/impl/writing.py
+++ b/app/services/impl/writing.py
@@ -1,5 +1,7 @@
+from typing import List, Dict
+
from app.services.abc import IWritingService, ILLMService, IAIDetectorService
-from app.configs.constants import GPTModels, TemperatureSettings
+from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises
from app.helpers import TextHelper, ExercisesHelper
@@ -17,10 +19,7 @@ class WritingService(IWritingService):
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
- {
- "role": "user",
- "content": self._get_writing_prompt(task, topic, difficulty)
- }
+ *self._get_writing_messages(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
@@ -32,15 +31,18 @@ class WritingService(IWritingService):
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
+ question = response["prompt"].strip()
+
return {
- "question": response["prompt"].strip(),
+ "question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
"topic": topic
}
@staticmethod
- def _get_writing_prompt(task: int, topic: str, difficulty: str):
- return (
+ def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
+ # TODO: Should the muslim disclaimer be added to task 2?
+ task_prompt = (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
@@ -52,32 +54,41 @@ class WritingService(IWritingService):
f'analysis of contrasting perspectives on the topic of "{topic}".'
)
+ task_instructions = (
+ 'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
+ 'the answer should include.'
+ ) if task == 1 else (
+ 'The question should lead to an answer with either "theories", "complicated information" or '
+ 'be "very descriptive" on the topic.'
+ )
+
+ messages = [
+ {
+ "role": "user",
+ "content": task_prompt
+ },
+ {
+ "role": "user",
+ "content": task_instructions
+ }
+ ]
+
+ return messages
+
async def grade_writing_task(self, task: int, question: str, answer: str):
bare_minimum = 100 if task == 1 else 180
- minimum = 150 if task == 1 else 250
-
- # TODO: left as is, don't know if this is intended or not
- llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
- temperature = (
- TemperatureSettings.GRADING_TEMPERATURE
- if task == 1 else
- TemperatureSettings.GEN_QUESTION_TEMPERATURE
- )
if not TextHelper.has_words(answer):
return self._zero_rating("The answer does not contain enough english words.")
elif not TextHelper.has_x_words(answer, bare_minimum):
return self._zero_rating("The answer is insufficient and too small to be graded.")
else:
+ template = self._get_writing_template()
messages = [
{
"role": "system",
"content": (
- 'You are a helpful assistant designed to output JSON on this format: '
- '{"perfect_answer": "example perfect answer", "comment": '
- '"comment about answer quality", "overall": 0.0, "task_response": '
- '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
- '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }'
+ f'You are a helpful assistant designed to output JSON on this format: {template}'
)
},
{
@@ -86,16 +97,28 @@ class WritingService(IWritingService):
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
- f'Additionally, provide an exemplary answer with a minimum of {minimum} words, along with a '
- 'detailed commentary highlighting both strengths and weaknesses in the response. '
+ 'Additionally, provide a detailed commentary highlighting both strengths and '
+ 'weaknesses in the response. '
f'\n Question: "{question}" \n Answer: "{answer}"')
- },
- {
- "role": "user",
- "content": f'The perfect answer must have at least {minimum} words.'
}
]
+ if task == 1:
+ messages.append({
+ "role": "user",
+ "content": (
+ 'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
+ '"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
+ )
+ })
+
+ llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+ temperature = (
+ TemperatureSettings.GRADING_TEMPERATURE
+ if task == 1 else
+ TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
response = await self._llm.prediction(
llm_model,
messages,
@@ -103,6 +126,10 @@ class WritingService(IWritingService):
temperature
)
+ perfect_answer_minimum = 150 if task == 1 else 250
+ perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum)
+
+ response["perfect_answer"] = perfect_answer["perfect_answer"]
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = await self._get_fixed_text(answer)
@@ -114,13 +141,20 @@ class WritingService(IWritingService):
async def _get_fixed_text(self, text):
messages = [
- {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
- '{"fixed_text": "fixed test with no misspelling errors"}')
- },
- {"role": "user", "content": (
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"fixed_text": "fixed test with no misspelling errors"}'
+ )
+ },
+ {
+ "role": "user",
+ "content": (
'Fix the errors in the given text and put it in a JSON. '
- f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"')
- }
+ f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
+ )
+ }
]
response = await self._llm.prediction(
@@ -132,16 +166,83 @@ class WritingService(IWritingService):
)
return response["fixed_text"]
+ async def _get_perfect_answer(self, question: str, size: int) -> Dict:
+ messages = [
+ {
+ "role": "system",
+ "content": (
+ 'You are a helpful assistant designed to output JSON on this format: '
+ '{"perfect_answer": "perfect answer for the question"}'
+ )
+ },
+ {
+ "role": "user",
+ "content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
+
+ },
+ {
+ "role": "user",
+ "content": f'The answer must have at least {size} words'
+ }
+ ]
+ return await self._llm.prediction(
+ GPTModels.GPT_4_O,
+ messages,
+ ["perfect_answer"],
+ TemperatureSettings.GEN_QUESTION_TEMPERATURE
+ )
+
@staticmethod
def _zero_rating(comment: str):
return {
'comment': comment,
'overall': 0,
'task_response': {
- 'Coherence and Cohesion': 0,
- 'Grammatical Range and Accuracy': 0,
- 'Lexical Resource': 0,
- 'Task Achievement': 0
+ 'Task Achievement': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Coherence and Cohesion': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Lexical Resource': {
+ "grade": 0.0,
+ "comment": ""
+ },
+ 'Grammatical Range and Accuracy': {
+ "grade": 0.0,
+ "comment": ""
+ }
}
}
+ @staticmethod
+ def _get_writing_template():
+ return {
+ "comment": "comment about student's response quality",
+ "overall": 0.0,
+ "task_response": {
+ "Task Achievement": {
+ "grade": 0.0,
+ "comment": "comment about Task Achievement of the student's response"
+ },
+ "Coherence and Cohesion": {
+ "grade": 0.0,
+ "comment": "comment about Coherence and Cohesion of the student's response"
+ },
+ "Lexical Resource": {
+ "grade": 0.0,
+ "comment": "comment about Lexical Resource of the student's response"
+ },
+ "Grammatical Range and Accuracy": {
+ "grade": 0.0,
+ "comment": "comment about Grammatical Range and Accuracy of the student's response"
+ }
+ }
+ }
+
+ @staticmethod
+ def _add_newline_before_hyphen(s):
+ return s.replace(" -", "\n-")
+
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
new file mode 100644
index 0000000..f366ec6
--- /dev/null
+++ b/app/utils/__init__.py
@@ -0,0 +1,5 @@
+from .handle_exception import handle_exception
+
+__all__ = [
+ "handle_exception"
+]
diff --git a/app/utils/handle_exception.py b/app/utils/handle_exception.py
new file mode 100644
index 0000000..1c340d6
--- /dev/null
+++ b/app/utils/handle_exception.py
@@ -0,0 +1,15 @@
+import functools
+from typing import Callable, Any
+from fastapi import Response
+
+
+def handle_exception(status_code: int = 500):
+ def decorator(func: Callable) -> Callable:
+ @functools.wraps(func)
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
+ try:
+ return await func(*args, **kwargs)
+ except Exception as e:
+ return Response(content=str(e), status_code=status_code)
+ return wrapper
+ return decorator
diff --git a/pyproject.toml b/pyproject.toml
index 25243aa..bc5026a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
[tool.poetry]
-name = "encoach-be"
+name = "ielts-be"
version = "0.1.0"
description = ""
authors = ["Ecrop Devteam "]
@@ -17,6 +17,14 @@ firebase-admin = "^6.5.0"
wonderwords = "^2.2.0"
dependency-injector = "^4.41.0"
openai = "^1.37.0"
+python-multipart = "0.0.9"
+faiss-cpu = "1.8.0.post1"
+pypandoc = "1.13"
+pdfplumber = "0.11.3"
+numpy = "1.26.4"
+pillow = "10.4.0"
+sentence-transformers = "3.0.1"
+openai-whisper = "20231117"
[build-system]
diff --git a/tmp/placeholder.txt b/tmp/placeholder.txt
new file mode 100644
index 0000000..f89d219
--- /dev/null
+++ b/tmp/placeholder.txt
@@ -0,0 +1 @@
+THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
\ No newline at end of file