diff --git a/.env b/.env index 6097ff7..8c214ae 100644 --- a/.env +++ b/.env @@ -1,7 +1,8 @@ +ENV=local OPENAI_API_KEY=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 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0 -GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json +GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA== GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af diff --git a/.gitignore b/.gitignore index 05ce478..aecdd8d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ __pycache__ .idea .env .DS_Store -firebase-configs/local.json .venv +scripts diff --git a/.idea/ielts-be.iml b/.idea/ielts-be.iml index 7af039d..a9631c9 100644 --- a/.idea/ielts-be.iml +++ b/.idea/ielts-be.iml @@ -5,9 +5,10 @@ + - + diff --git a/.idea/misc.xml b/.idea/misc.xml index f6104af..6601cfb 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,9 @@ - + + + diff --git a/Dockerfile b/Dockerfile index 6ecadc0..64e8726 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,12 +18,16 @@ COPY . ./ COPY --from=requirements-stage /tmp/requirements.txt /app/requirements.txt -RUN apt update && apt install -y ffmpeg - -RUN pip install openai-whisper - -# openai-whisper model in not compatible with the newer 2.0.0 numpy release -RUN pip install --upgrade numpy<2 +RUN apt update && apt install -y \ + ffmpeg \ + poppler-utils \ + texlive-latex-base \ + texlive-fonts-recommended \ + texlive-latex-extra \ + texlive-xetex \ + pandoc \ + librsvg2-bin \ + && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir -r /app/requirements.txt diff --git a/README.md b/README.md index 910c41e..e22b313 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,5 @@ -# Disclaimer +Latest refactor from develop's branch commit 5d5cd21 2024-08-28 -I didn't fully test all the endpoints, the main purpose of this release was for ielts-be to be async but I've also -separated logic through different layers, removed some duplication and implemented dependency injection, so there -could be errors and extensive testing is needed before even considering deploying (if you're even considering it). - -The version this was refactored from was master's branch commit a4caecd 2024-06-13 - -# Changes - -Since one of my use cases is load testing with 5000 concurrent users and ielts-be is sync, I've refactored ielts-be -into this fastapi app. - -The ielts-be Dockerfile runs the container with: - -```CMD exec gunicorn --bind 0.0.0.0:5000 --workers 1 --threads 8 --timeout 0 app:app``` - -And since gunicorn uses WSGI and ielts-be has mostly sync I/O blocking operations, everytime a request encounters -an I/O blocking operation a thread is blocked. Since this config is 1 worker with 8 threads, the container -will only be able to handle 8 concurrent requests at a time before gcloud run cold starts another instance. - -Flask was built with WSGI in mind, having Quart as it's async alternative, even though you can serve Flask -with uvicorn using the [asgiref](https://pypi.org/project/asgiref/) adapter, FastAPI has better performance -than both alternatives and the sync calls would need to be modified either way. # Endpoints @@ -29,34 +7,38 @@ In ielts-ui I've added a wrapper to every backend request in '/src/utils/transla new endpoints if the "BACKEND_TYPE" environment variable is set to "async", if the env variable is not present or with another value, the wrapper will return the old endpoint. -| Method | ielts-be | This one | -|--------|--------------------------------------|------------------------------------------| -| GET | /healthcheck | /api/healthcheck | -| GET | /listening_section_1 | /api/listening/section/1 | -| GET | /listening_section_2 | /api/listening/section/2 | -| GET | /listening_section_3 | /api/listening/section/3 | -| GET | /listening_section_4 | /api/listening/section/4 | -| POST | /listening | /api/listening | -| POST | /writing_task1 | /api/grade/writing/1 | -| POST | /writing_task2 | /api/grade/writing/2 | -| GET | /writing_task1_general | /api/writing/1 | -| GET | /writing_task2_general | /api/writing/2 | -| POST | /speaking_task_1 | /api/grade/speaking/1 | -| POST | /speaking_task_2 | /api/grade/speaking/2 | -| POST | /speaking_task_3 | /api/grade/speaking/3 | -| GET | /speaking_task_1 | /api/speaking/1 | -| GET | /speaking_task_2 | /api/speaking/2 | -| GET | /speaking_task_3 | /api/speaking/3 | -| POST | /speaking | /api/speaking | -| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video | -| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video | -| GET | /reading_passage_1 | /api/reading/passage/1 | -| GET | /reading_passage_2 | /api/reading/passage/2 | -| GET | /reading_passage_3 | /api/reading/passage/3 | -| GET | /level | /api/level | -| GET | /level_utas | /api/level/utas | -| POST | /fetch_tips | /api/training/tips | -| POST | /grading_summary | /api/grade/summary | +| Method | ielts-be | This one | +|--------|--------------------------------------|---------------------------------------------| +| GET | /healthcheck | /api/healthcheck | +| GET | /listening_section_1 | /api/listening/section/1 | +| GET | /listening_section_2 | /api/listening/section/2 | +| GET | /listening_section_3 | /api/listening/section/3 | +| GET | /listening_section_4 | /api/listening/section/4 | +| POST | /listening | /api/listening | +| POST | /writing_task1 | /api/grade/writing/1 | +| POST | /writing_task2 | /api/grade/writing/2 | +| GET | /writing_task1_general | /api/writing/1 | +| GET | /writing_task2_general | /api/writing/2 | +| POST | /speaking_task_1 | /api/grade/speaking/1 | +| POST | /speaking_task_2 | /api/grade/speaking/2 | +| POST | /speaking_task_3 | /api/grade/speaking/3 | +| GET | /speaking_task_1 | /api/speaking/1 | +| GET | /speaking_task_2 | /api/speaking/2 | +| GET | /speaking_task_3 | /api/speaking/3 | +| POST | /speaking | /api/speaking | +| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video | +| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video | +| GET | /reading_passage_1 | /api/reading/passage/1 | +| GET | /reading_passage_2 | /api/reading/passage/2 | +| GET | /reading_passage_3 | /api/reading/passage/3 | +| GET | /level | /api/level | +| GET | /level_utas | /api/level/utas | +| POST | /fetch_tips | /api/training/tips | +| POST | /grading_summary | /api/grade/summary | +| POST | /grade_short_answers | /api/grade/short_answers | +| POST | /upload_level | /api/level/upload | +| POST | /training_content | /api/training/ | +| POST | /custom_level | /api/level/custom | # Run the app @@ -64,9 +46,7 @@ This is for Windows, creating venv and activating it may differ based on your OS 1. python -m venv env 2. env\Scripts\activate -3. pip install openai-whisper -4. pip install --upgrade numpy<2 -5. pip install poetry -6. poetry install -7. python main.py +3. pip install poetry +4. poetry install +5. python app.py diff --git a/app.py b/app.py index 684a422..80feec4 100644 --- a/app.py +++ b/app.py @@ -1,1154 +1,30 @@ -import threading -from functools import reduce - -import firebase_admin -from firebase_admin import credentials -from flask import Flask, request -from flask_jwt_extended import JWTManager, jwt_required - -from helper.api_messages import * -from helper.exam_variant import ExamVariant -from helper.exercises import * -from helper.file_helper import delete_files_older_than_one_day -from helper.firebase_helper import * -from helper.heygen_api import create_video, create_videos_and_save_to_db -from helper.openai_interface import * -from helper.question_templates import * -from helper.speech_to_text_helper import * -from heygen.AvatarEnum import AvatarEnum - -load_dotenv() - -app = Flask(__name__) - -app.config['JWT_SECRET_KEY'] = os.getenv("JWT_SECRET_KEY") -jwt = JWTManager(app) - -# Initialize Firebase Admin SDK -cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS")) -FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET') - -firebase_admin.initialize_app(cred) - -thread_event = threading.Event() - -# Configure logging -logging.basicConfig(level=logging.DEBUG, # Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) - format='%(asctime)s - %(levelname)s - %(message)s') - - -@app.route('/healthcheck', methods=['GET']) -def healthcheck(): - return {"healthy": True} - - -@app.route('/listening_section_1', methods=['GET']) -@jwt_required() -def get_listening_section_1_question(): - try: - delete_files_older_than_one_day(AUDIO_FILES_PATH) - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(two_people_scenarios)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - - if (len(req_exercises) == 0): - req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1) - - number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises)) - - processed_conversation = generate_listening_1_conversation(topic) - - app.logger.info("Generated conversation: " + str(processed_conversation)) - - start_id = 1 - exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises, - number_of_exercises_q, - start_id, difficulty) - return { - "exercises": exercises, - "text": processed_conversation, - "difficulty": difficulty - } - except Exception as e: - return str(e) - - -@app.route('/listening_section_2', methods=['GET']) -@jwt_required() -def get_listening_section_2_question(): - try: - delete_files_older_than_one_day(AUDIO_FILES_PATH) - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(social_monologue_contexts)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - - if (len(req_exercises) == 0): - req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2) - - number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises)) - - monologue = generate_listening_2_monologue(topic) - - app.logger.info("Generated monologue: " + str(monologue)) - start_id = 11 - exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q, - start_id, difficulty) - return { - "exercises": exercises, - "text": monologue, - "difficulty": difficulty - } - except Exception as e: - return str(e) - - -@app.route('/listening_section_3', methods=['GET']) -@jwt_required() -def get_listening_section_3_question(): - try: - delete_files_older_than_one_day(AUDIO_FILES_PATH) - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(four_people_scenarios)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - - if (len(req_exercises) == 0): - req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1) - - number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises)) - - processed_conversation = generate_listening_3_conversation(topic) - - app.logger.info("Generated conversation: " + str(processed_conversation)) - - start_id = 21 - exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises, - number_of_exercises_q, - start_id, difficulty) - return { - "exercises": exercises, - "text": processed_conversation, - "difficulty": difficulty - } - except Exception as e: - return str(e) - - -@app.route('/listening_section_4', methods=['GET']) -@jwt_required() -def get_listening_section_4_question(): - try: - delete_files_older_than_one_day(AUDIO_FILES_PATH) - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(academic_subjects)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - - if (len(req_exercises) == 0): - req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2) - - number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises)) - - monologue = generate_listening_4_monologue(topic) - - app.logger.info("Generated monologue: " + str(monologue)) - start_id = 31 - exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q, - start_id, difficulty) - return { - "exercises": exercises, - "text": monologue, - "difficulty": difficulty - } - except Exception as e: - return str(e) - - -@app.route('/listening', methods=['POST']) -@jwt_required() -def save_listening(): - try: - data = request.get_json() - parts = data.get('parts') - minTimer = data.get('minTimer', LISTENING_MIN_TIMER_DEFAULT) - difficulty = data.get('difficulty', random.choice(difficulties)) - template = getListeningTemplate() - template['difficulty'] = difficulty - id = str(uuid.uuid4()) - for i, part in enumerate(parts, start=0): - part_template = getListeningPartTemplate() - - file_name = str(uuid.uuid4()) + ".mp3" - sound_file_path = AUDIO_FILES_PATH + file_name - firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name - if "conversation" in part["text"]: - conversation_text_to_speech(part["text"]["conversation"], sound_file_path) - else: - text_to_speech(part["text"], sound_file_path) - file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path) - - part_template["audio"]["source"] = file_url - part_template["exercises"] = part["exercises"] - - template['parts'].append(part_template) - - if minTimer != LISTENING_MIN_TIMER_DEFAULT: - template["minTimer"] = minTimer - template["variant"] = ExamVariant.PARTIAL.value - else: - template["variant"] = ExamVariant.FULL.value - - (result, id) = save_to_db_with_id("listening", template, id) - if result: - return {**template, "id": id} - else: - raise Exception("Failed to save question: " + parts) - except Exception as e: - return str(e) - - -@app.route('/writing_task1', methods=['POST']) -@jwt_required() -def grade_writing_task_1(): - try: - data = request.get_json() - question = data.get('question') - answer = data.get('answer') - if not has_words(answer): - return { - 'comment': "The answer does not contain enough english words.", - 'overall': 0, - 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 - } - } - elif not has_x_words(answer, 100): - return { - 'comment': "The answer is insufficient and too small to be graded.", - 'overall': 0, - 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 - } - } - else: - messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"perfect_answer": "example perfect answer", "comment": ' - '"comment about answer quality", "overall": 0.0, "task_response": ' - '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' - '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') - }, - { - "role": "user", - "content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, ' - 'ensuring a strict assessment that penalizes errors. Deduct points for deviations ' - 'from the task, and assign a score of 0 if the response fails to address the question. ' - 'Additionally, provide an exemplary answer with a minimum of 150 words, along with a ' - 'detailed commentary highlighting both strengths and weaknesses in the response. ' - '\n Question: "' + question + '" \n Answer: "' + answer + '"') - }, - { - "role": "user", - "content": 'The perfect answer must have at least 150 words.' - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, - ["comment"], - GRADING_TEMPERATURE) - response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) - response['fixed_text'] = get_fixed_text(answer) - return response - except Exception as e: - return str(e) - - -@app.route('/writing_task1_general', methods=['GET']) -@jwt_required() -def get_writing_task_1_general_question(): - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) - try: - messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"prompt": "prompt content"}') - }, - { - "role": "user", - "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the ' - 'student to compose a letter. The prompt should present a specific scenario or situation, ' - 'based on the topic of "' + topic + '", requiring the student to provide information, ' - 'advice, or instructions within the letter. ' - 'Make sure that the generated prompt is ' - 'of ' + difficulty + 'difficulty and does not contain ' - 'forbidden subjects in muslim ' - 'countries.') - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt", - GEN_QUESTION_TEMPERATURE) - return { - "question": response["prompt"].strip(), - "difficulty": difficulty, - "topic": topic - } - except Exception as e: - return str(e) - - -@app.route('/writing_task2', methods=['POST']) -@jwt_required() -def grade_writing_task_2(): - try: - data = request.get_json() - question = data.get('question') - answer = data.get('answer') - if not has_words(answer): - return { - 'comment': "The answer does not contain enough english words.", - 'overall': 0, - 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 - } - } - elif not has_x_words(answer, 180): - return { - 'comment': "The answer is insufficient and too small to be graded.", - 'overall': 0, - 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 - } - } - else: - messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"perfect_answer": "example perfect answer", "comment": ' - '"comment about answer quality", "overall": 0.0, "task_response": ' - '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' - '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') - }, - { - "role": "user", - "content": ( - 'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a ' - 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide an ' - 'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting ' - 'both strengths and weaknesses in the response.' - '\n Question: "' + question + '" \n Answer: "' + answer + '"') - }, - { - "role": "user", - "content": 'The perfect answer must have at least 250 words.' - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, ["comment"], - GEN_QUESTION_TEMPERATURE) - response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) - response['fixed_text'] = get_fixed_text(answer) - return response - except Exception as e: - return str(e) - - -def fix_writing_overall(overall: float, task_response: dict): - if overall > max(task_response.values()) or overall < min(task_response.values()): - total_sum = sum(task_response.values()) - average = total_sum / len(task_response.values()) - rounded_average = round(average, 0) - return rounded_average - return overall - - -@app.route('/writing_task2_general', methods=['GET']) -@jwt_required() -def get_writing_task_2_general_question(): - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) - try: - messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"prompt": "prompt content"}') - }, - { - "role": "user", - "content": ( - 'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate ' - 'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". ' - 'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or ' - 'examples, and present a well-rounded argument before concluding with their personal opinion on the subject.') - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE) - return { - "question": response["prompt"].strip(), - "difficulty": difficulty, - "topic": topic - } - except Exception as e: - return str(e) - - -@app.route('/speaking_task_1', methods=['POST']) -@jwt_required() -def grade_speaking_task_1(): - request_id = uuid.uuid4() - delete_files_older_than_one_day(AUDIO_FILES_PATH) - sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) - logging.info("POST - speaking_task_1 - Received request to grade speaking task 1. " - "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json())) - try: - data = request.get_json() - question = data.get('question') - answer_firebase_path = data.get('answer') - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + answer_firebase_path) - download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) - logging.info("POST - speaking_task_1 - " + str( - request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name) - - answer = speech_to_text(sound_file_name) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer) - - if has_x_words(answer, 20): - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"comment": "comment about answer quality", "overall": 0.0, ' - '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' - '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') - }, - { - "role": "user", - "content": ( - 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a ' - 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide ' - 'detailed commentary highlighting both strengths and weaknesses in the response.' - '\n Question: "' + question + '" \n Answer: "' + answer + '"') - } - ] - token_count = count_total_tokens(messages) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.") - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], - GRADING_TEMPERATURE) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response)) - - perfect_answer_messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"answer": "perfect answer"}') - }, - { - "role": "user", - "content": ( - 'Provide a perfect answer according to ielts grading system to the following ' - 'Speaking Part 1 question: "' + question + '"') - } - ] - token_count = count_total_tokens(perfect_answer_messages) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.") - response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO, - perfect_answer_messages, - token_count, - ["answer"], - GEN_QUESTION_TEMPERATURE)["answer"] - logging.info("POST - speaking_task_1 - " + str( - request_id) + " - Perfect answer: " + response['perfect_answer']) - - response['transcript'] = answer - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting fixed text.") - response['fixed_text'] = get_speaking_corrections(answer) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Fixed text: " + response['fixed_text']) - - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = round((response["task_response"]["Fluency and Coherence"] + - response["task_response"]["Lexical Resource"] + response["task_response"][ - "Grammatical Range and Accuracy"] + response["task_response"][ - "Pronunciation"]) / 4, 1) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response)) - return response - else: - logging.info("POST - speaking_task_1 - " + str( - request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer) - return { - "comment": "The audio recorded does not contain enough english words to be graded.", - "overall": 0, - "task_response": { - "Fluency and Coherence": 0, - "Lexical Resource": 0, - "Grammatical Range and Accuracy": 0, - "Pronunciation": 0 - } - } - except Exception as e: - os.remove(sound_file_name) - return str(e), 400 - - -@app.route('/speaking_task_1', methods=['GET']) -@jwt_required() -def get_speaking_task_1_question(): - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) - try: - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"topic": "topic", "question": "question"}') - }, - { - "role": "user", - "content": ( - 'Craft a thought-provoking question of ' + difficulty + ' difficulty for IELTS Speaking Part 1 ' - 'that encourages candidates to delve deeply into ' - 'personal experiences, preferences, or insights on the topic ' - 'of "' + topic + '". Instruct the candidate ' - 'to offer not only detailed ' - 'descriptions but also provide ' - 'nuanced explanations, examples, ' - 'or anecdotes to enrich their response. ' - 'Make sure that the generated question ' - 'does not contain forbidden subjects in ' - 'muslim countries.') - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, ["topic"], - GEN_QUESTION_TEMPERATURE) - response["type"] = 1 - response["difficulty"] = difficulty - response["topic"] = topic - return response - except Exception as e: - return str(e) - - -@app.route('/speaking_task_2', methods=['POST']) -@jwt_required() -def grade_speaking_task_2(): - request_id = uuid.uuid4() - delete_files_older_than_one_day(AUDIO_FILES_PATH) - sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) - logging.info("POST - speaking_task_2 - Received request to grade speaking task 2. " - "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json())) - try: - data = request.get_json() - question = data.get('question') - answer_firebase_path = data.get('answer') - - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Downloading file " + answer_firebase_path) - download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) - logging.info("POST - speaking_task_2 - " + str( - request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name) - - answer = speech_to_text(sound_file_name) - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Transcripted answer: " + answer) - - if has_x_words(answer, 20): - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"comment": "comment about answer quality", "overall": 0.0, ' - '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' - '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') - }, - { - "role": "user", - "content": ( - 'Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a ' - 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide ' - 'detailed commentary highlighting both strengths and weaknesses in the response.' - '\n Question: "' + question + '" \n Answer: "' + answer + '"') - } - ] - token_count = count_total_tokens(messages) - - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting grading of the answer.") - response = make_openai_call(GPT_3_5_TURBO, messages, token_count,["comment"], - GRADING_TEMPERATURE) - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Answer graded: " + str(response)) - - perfect_answer_messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"answer": "perfect answer"}') - }, - { - "role": "user", - "content": ( - 'Provide a perfect answer according to ielts grading system to the following ' - 'Speaking Part 2 question: "' + question + '"') - } - ] - token_count = count_total_tokens(perfect_answer_messages) - - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting perfect answer.") - response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO, - perfect_answer_messages, - token_count, - ["answer"], - GEN_QUESTION_TEMPERATURE)["answer"] - logging.info("POST - speaking_task_2 - " + str( - request_id) + " - Perfect answer: " + response['perfect_answer']) - - response['transcript'] = answer - - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting fixed text.") - response['fixed_text'] = get_speaking_corrections(answer) - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Fixed text: " + response['fixed_text']) - - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = round((response["task_response"]["Fluency and Coherence"] + - response["task_response"]["Lexical Resource"] + response["task_response"][ - "Grammatical Range and Accuracy"] + response["task_response"][ - "Pronunciation"]) / 4, 1) - - logging.info("POST - speaking_task_2 - " + str(request_id) + " - Final response: " + str(response)) - return response - else: - logging.info("POST - speaking_task_2 - " + str( - request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer) - return { - "comment": "The audio recorded does not contain enough english words to be graded.", - "overall": 0, - "task_response": { - "Fluency and Coherence": 0, - "Lexical Resource": 0, - "Grammatical Range and Accuracy": 0, - "Pronunciation": 0 - } - } - except Exception as e: - os.remove(sound_file_name) - return str(e), 400 - - -@app.route('/speaking_task_2', methods=['GET']) -@jwt_required() -def get_speaking_task_2_question(): - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) - try: - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}') - }, - { - "role": "user", - "content": ( - 'Create a question of ' + difficulty + ' difficulty for IELTS Speaking Part 2 ' - 'that encourages candidates to narrate a ' - 'personal experience or story related to the topic ' - 'of "' + topic + '". Include 3 prompts that ' - 'guide the candidate to describe ' - 'specific aspects of the experience, ' - 'such as details about the situation, ' - 'their actions, and the reasons it left a ' - 'lasting impression. Make sure that the ' - 'generated question does not contain ' - 'forbidden subjects in muslim countries.') - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE) - response["type"] = 2 - response["difficulty"] = difficulty - response["topic"] = topic - return response - except Exception as e: - return str(e) - - -@app.route('/speaking_task_3', methods=['GET']) -@jwt_required() -def get_speaking_task_3_question(): - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) - try: - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"topic": "topic", "questions": ["question", "question", "question"]}') - }, - { - "role": "user", - "content": ( - 'Formulate a set of 3 questions of ' + difficulty + ' difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a ' - 'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring ' - 'they explore various aspects, perspectives, and implications related to the topic.' - 'Make sure that the generated question does not contain forbidden subjects in muslim countries.') - - } - ] - token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE) - # Remove the numbers from the questions only if the string starts with a number - response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for - question in response["questions"]] - response["type"] = 3 - response["difficulty"] = difficulty - response["topic"] = topic - return response - except Exception as e: - return str(e) - - -@app.route('/speaking_task_3', methods=['POST']) -@jwt_required() -def grade_speaking_task_3(): - request_id = uuid.uuid4() - delete_files_older_than_one_day(AUDIO_FILES_PATH) - logging.info("POST - speaking_task_3 - Received request to grade speaking task 3. " - "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json())) - try: - data = request.get_json() - answers = data.get('answers') - text_answers = [] - perfect_answers = [] - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - Received " + str(len(answers)) + " total answers.") - for item in answers: - sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) - - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Downloading file " + item["answer"]) - download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name) - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - Downloaded file " + item["answer"] + " to " + sound_file_name) - - answer_text = speech_to_text(sound_file_name) - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Transcripted answer: " + answer_text) - - text_answers.append(answer_text) - item["answer"] = answer_text - os.remove(sound_file_name) - if not has_x_words(answer_text, 20): - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer_text) - return { - "comment": "The audio recorded does not contain enough english words to be graded.", - "overall": 0, - "task_response": { - "Fluency and Coherence": 0, - "Lexical Resource": 0, - "Grammatical Range and Accuracy": 0, - "Pronunciation": 0 - } - } - - perfect_answer_messages = [ - { - "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"answer": "perfect answer"}') - }, - { - "role": "user", - "content": ( - 'Provide a perfect answer according to ielts grading system to the following ' - 'Speaking Part 3 question: "' + item["question"] + '"') - } - ] - token_count = count_total_tokens(perfect_answer_messages) - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - Requesting perfect answer for question: " + item["question"]) - perfect_answers.append(make_openai_call(GPT_3_5_TURBO, - perfect_answer_messages, - token_count, - ["answer"], - GEN_QUESTION_TEMPERATURE)) - - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"comment": "comment about answer quality", "overall": 0.0, ' - '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' - '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') - } - ] - message = ( - "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide detailed " - "commentary highlighting both strengths and weaknesses in the response." - "\n\n The questions and answers are: \n\n'") - - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Formatting answers and questions for prompt.") - formatted_text = "" - for i, entry in enumerate(answers, start=1): - formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" - formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - Formatted answers and questions for prompt: " + formatted_text) - - message += formatted_text - - messages.append({ - "role": "user", - "content": message - }) - - token_count = count_total_tokens(messages) - - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Requesting grading of the answers.") - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], GRADING_TEMPERATURE) - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Answers graded: " + str(response)) - - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Adding perfect answers to response.") - for i, answer in enumerate(perfect_answers, start=1): - response['perfect_answer_' + str(i)] = answer - - logging.info("POST - speaking_task_3 - " + str( - request_id) + " - Adding transcript and fixed texts to response.") - for i, answer in enumerate(text_answers, start=1): - response['transcript_' + str(i)] = answer - response['fixed_text_' + str(i)] = get_speaking_corrections(answer) - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["task_response"][ - "Lexical Resource"] + response["task_response"]["Grammatical Range and Accuracy"] + - response["task_response"]["Pronunciation"]) / 4, 1) - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Final response: " + str(response)) - return response - except Exception as e: - return str(e), 400 - - -@app.route('/speaking', methods=['POST']) -@jwt_required() -def save_speaking(): - try: - data = request.get_json() - exercises = data.get('exercises') - minTimer = data.get('minTimer', SPEAKING_MIN_TIMER_DEFAULT) - template = getSpeakingTemplate() - template["minTimer"] = minTimer - - if minTimer < SPEAKING_MIN_TIMER_DEFAULT: - template["variant"] = ExamVariant.PARTIAL.value - else: - template["variant"] = ExamVariant.FULL.value - - id = str(uuid.uuid4()) - app.logger.info('Received request to save speaking with id: ' + id) - thread_event.set() - thread = threading.Thread( - target=create_videos_and_save_to_db, - args=(exercises, template, id), - name=("thread-save-speaking-" + id) - ) - thread.start() - app.logger.info('Started thread to save speaking. Thread: ' + thread.getName()) - - # Return response without waiting for create_videos_and_save_to_db to finish - return {**template, "id": id} - except Exception as e: - return str(e) - - -@app.route("/speaking/generate_speaking_video", methods=['POST']) -@jwt_required() -def generate_speaking_video(): - try: - data = request.get_json() - avatar = data.get("avatar", random.choice(list(AvatarEnum)).value) - prompts = data.get("prompts", []) - question = data.get("question") - if len(prompts) > 0: - question = question + " In your answer you should consider: " + " ".join(prompts) - sp1_result = create_video(question, avatar) - if sp1_result is not None: - sound_file_path = VIDEO_FILES_PATH + sp1_result - firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result - url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path) - sp1_video_path = firebase_file_path - sp1_video_url = url - - return { - "text": data["question"], - "prompts": prompts, - "title": data["topic"], - "video_url": sp1_video_url, - "video_path": sp1_video_path, - "type": "speaking", - "id": uuid.uuid4() - } - else: - app.logger.error("Failed to create video for part 1 question: " + data["question"]) - return str("Failed to create video for part 1 question: " + data["question"]) - - except Exception as e: - return str(e) - - -@app.route("/speaking/generate_interactive_video", methods=['POST']) -@jwt_required() -def generate_interactive_video(): - try: - data = request.get_json() - sp3_questions = [] - avatar = data.get("avatar", random.choice(list(AvatarEnum)).value) - - app.logger.info('Creating videos for speaking part 3') - for question in data["questions"]: - result = create_video(question, avatar) - if result is not None: - sound_file_path = VIDEO_FILES_PATH + result - firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result - url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path) - video = { - "text": question, - "video_path": firebase_file_path, - "video_url": url - } - sp3_questions.append(video) - else: - app.app.logger.error("Failed to create video for part 3 question: " + question) - - return { - "prompts": sp3_questions, - "title": data["topic"], - "type": "interactiveSpeaking", - "id": uuid.uuid4() - } - except Exception as e: - return str(e) - - -@app.route('/reading_passage_1', methods=['GET']) -@jwt_required() -def get_reading_passage_1_question(): - try: - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(topics)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - return gen_reading_passage_1(topic, req_exercises, difficulty) - except Exception as e: - return str(e) - - -@app.route('/reading_passage_2', methods=['GET']) -@jwt_required() -def get_reading_passage_2_question(): - try: - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(topics)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - return gen_reading_passage_2(topic, req_exercises, difficulty) - except Exception as e: - return str(e) - - -@app.route('/reading_passage_3', methods=['GET']) -@jwt_required() -def get_reading_passage_3_question(): - try: - # Extract parameters from the URL query string - topic = request.args.get('topic', default=random.choice(topics)) - req_exercises = request.args.getlist('exercises') - difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - return gen_reading_passage_3(topic, req_exercises, difficulty) - except Exception as e: - return str(e) - - -@app.route('/level', methods=['GET']) -@jwt_required() -def get_level_exam(): - try: - number_of_exercises = 25 - exercises = gen_multiple_choice_level(number_of_exercises) - return { - "exercises": [exercises], - "isDiagnostic": False, - "minTimer": 25, - "module": "level" - } - except Exception as e: - return str(e) - -@app.route('/level_utas', methods=['GET']) -@jwt_required() -def get_level_utas(): - try: - # Formats - mc = { - "id": str(uuid.uuid4()), - "prompt": "Choose the correct word or group of words that completes the sentences.", - "questions": None, - "type": "multipleChoice", - "part": 1 - } - - umc = { - "id": str(uuid.uuid4()), - "prompt": "Choose the underlined word or group of words that is not correct.", - "questions": None, - "type": "multipleChoice", - "part": 2 - } - - bs_1 = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and write the correct word for each space.", - "questions": None, - "type": "blankSpaceText", - "part": 3 - } - - bs_2 = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and write the correct word for each space.", - "questions": None, - "type": "blankSpaceText", - "part": 4 - } - - reading = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and answer the questions below.", - "questions": None, - "type": "readingExercises", - "part": 5 - } - - all_mc_questions = [] - - # PART 1 - mc_exercises1 = gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions) - print(json.dumps(mc_exercises1, indent=4)) - all_mc_questions.append(mc_exercises1) - - # PART 2 - mc_exercises2 = gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions) - print(json.dumps(mc_exercises2, indent=4)) - all_mc_questions.append(mc_exercises2) - - # PART 3 - mc_exercises3 = gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions) - print(json.dumps(mc_exercises3, indent=4)) - all_mc_questions.append(mc_exercises3) - - mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions'] - print(json.dumps(mc_exercises, indent=4)) - mc["questions"] = mc_exercises - - # Underlined mc - underlined_mc = gen_multiple_choice_underlined_utas(15, 46) - print(json.dumps(underlined_mc, indent=4)) - umc["questions"] = underlined_mc - - # Blank Space text 1 - blank_space_text_1 = gen_blank_space_text_utas(12, 61, 250) - print(json.dumps(blank_space_text_1, indent=4)) - bs_1["questions"] = blank_space_text_1 - - # Blank Space text 2 - blank_space_text_2 = gen_blank_space_text_utas(14, 73, 350) - print(json.dumps(blank_space_text_2, indent=4)) - bs_2["questions"] = blank_space_text_2 - - # Reading text - reading_text = gen_reading_passage_utas(87, 10, 4) - print(json.dumps(reading_text, indent=4)) - reading["questions"] = reading_text - - return { - "exercises": { - "blankSpaceMultipleChoice": mc, - "underlinedMultipleChoice": umc, - "blankSpaceText1": bs_1, - "blankSpaceText2": bs_2, - "readingExercises": reading, - }, - "isDiagnostic": False, - "minTimer": 25, - "module": "level" - } - except Exception as e: - return str(e) - - -@app.route('/fetch_tips', methods=['POST']) -@jwt_required() -def fetch_answer_tips(): - try: - data = request.get_json() - context = data.get('context') - question = data.get('question') - answer = data.get('answer') - correct_answer = data.get('correct_answer') - messages = get_question_tips(question, answer, correct_answer, context) - token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], - map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, None, TIPS_TEMPERATURE) - - if isinstance(response, str): - response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response) - - return response - except Exception as e: - return str(e) - - -@app.route('/grading_summary', methods=['POST']) -@jwt_required() -def grading_summary(): - # Body Format - # {'sections': Array of {'code': key, 'name': name, 'grade': grade}} - # Output Format - # {'sections': Array of {'code': key, 'name': name, 'grade': grade, 'evaluation': evaluation, 'suggestions': suggestions}} - try: - return calculate_grading_summary(request.get_json()) - except Exception as e: - return str(e) - - -if __name__ == '__main__': - app.run() +import os + +import click +import uvicorn +from dotenv import load_dotenv + + +@click.command() +@click.option( + "--env", + type=click.Choice(["local", "dev", "prod"], case_sensitive=False), + default="local", +) +def main(env: str): + load_dotenv() + os.environ["ENV"] = env + if env == "prod": + raise Exception("Production environment not supported yet!") + + uvicorn.run( + app="app.server:app", + host="localhost", + port=8000, + reload=True if env != "prod" else False, + workers=1, + ) + + +if __name__ == "__main__": + main() diff --git a/app/api/grade.py b/app/api/grade.py index 645b4b4..7f054e7 100644 --- a/app/api/grade.py +++ b/app/api/grade.py @@ -2,7 +2,8 @@ from dependency_injector.wiring import inject, Provide from fastapi import APIRouter, Depends, Path, Request from app.controllers.abc import IGradeController -from app.dtos import WritingGradeTaskDTO +from app.dtos.writing import WritingGradeTaskDTO +from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO from app.middlewares import Authorized, IsAuthenticatedViaBearerToken controller = "grade_controller" @@ -22,18 +23,29 @@ async def grade_writing_task( return await grade_controller.grade_writing_task(task, data) +@grade_router.post( + '/speaking/2', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def grade_speaking_task_2( + data: GradeSpeakingDTO, + grade_controller: IGradeController = Depends(Provide[controller]) +): + return await grade_controller.grade_speaking_task(2, [data.dict()]) + + @grade_router.post( '/speaking/{task}', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] ) @inject -async def grade_speaking_task( - request: Request, +async def grade_speaking_task_1_and_3( + data: GradeSpeakingAnswersDTO, task: int = Path(..., ge=1, le=3), grade_controller: IGradeController = Depends(Provide[controller]) ): - data = await request.json() - return await grade_controller.grade_speaking_task(task, data) + return await grade_controller.grade_speaking_task(task, data.answers) @grade_router.post( @@ -47,3 +59,16 @@ async def grading_summary( ): data = await request.json() return await grade_controller.grading_summary(data) + + +@grade_router.post( + '/short_answers', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def grade_short_answers( + request: Request, + grade_controller: IGradeController = Depends(Provide[controller]) +): + data = await request.json() + return await grade_controller.grade_short_answers(data) diff --git a/app/api/level.py b/app/api/level.py index a91bfdd..0c9a791 100644 --- a/app/api/level.py +++ b/app/api/level.py @@ -1,5 +1,5 @@ from dependency_injector.wiring import Provide, inject -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, UploadFile, Request from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.controllers.abc import ILevelController @@ -27,4 +27,29 @@ async def get_level_exam( async def get_level_utas( level_controller: ILevelController = Depends(Provide[controller]) ): - return await level_controller.get_level_exam() + return await level_controller.get_level_utas() + + +@level_router.post( + '/upload', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def upload( + file: UploadFile, + level_controller: ILevelController = Depends(Provide[controller]) +): + return await level_controller.upload_level(file) + + +@level_router.post( + '/custom', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def custom_level( + request: Request, + level_controller: ILevelController = Depends(Provide[controller]) +): + data = await request.json() + return await level_controller.get_custom_level(data) diff --git a/app/api/listening.py b/app/api/listening.py index 8b46bab..3fd15d4 100644 --- a/app/api/listening.py +++ b/app/api/listening.py @@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.controllers.abc import IListeningController from app.configs.constants import EducationalContent -from app.dtos import SaveListeningDTO +from app.dtos.listening import SaveListeningDTO controller = "listening_controller" diff --git a/app/api/speaking.py b/app/api/speaking.py index 130e6c2..ee32422 100644 --- a/app/api/speaking.py +++ b/app/api/speaking.py @@ -6,24 +6,40 @@ from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.configs.constants import EducationalContent from app.controllers.abc import ISpeakingController -from app.dtos import SaveSpeakingDTO, SpeakingGenerateVideoDTO, SpeakingGenerateInteractiveVideoDTO +from app.dtos.speaking import ( + SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO +) controller = "speaking_controller" speaking_router = APIRouter() +@speaking_router.get( + '/1', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def get_speaking_task( + first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)), + second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)), + difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)), + speaking_controller: ISpeakingController = Depends(Provide[controller]) +): + return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic) + + @speaking_router.get( '/{task}', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] ) @inject async def get_speaking_task( - task: int = Path(..., ge=1, le=3), + task: int = Path(..., ge=2, le=3), topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)), difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)), speaking_controller: ISpeakingController = Depends(Provide[controller]) ): - return await speaking_controller.get_speaking_task(task, topic, difficulty) + return await speaking_controller.get_speaking_part(task, topic, difficulty) @speaking_router.post( @@ -40,24 +56,42 @@ async def save_speaking( @speaking_router.post( - '/generate_speaking_video', + '/generate_video/1', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] ) @inject -async def generate_speaking_video( - data: SpeakingGenerateVideoDTO, +async def generate_video_1( + data: GenerateVideo1DTO, speaking_controller: ISpeakingController = Depends(Provide[controller]) ): - return await speaking_controller.generate_speaking_video(data) + return await speaking_controller.generate_video( + 1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic + ) @speaking_router.post( - '/generate_interactive_video', + '/generate_video/2', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] ) @inject -async def generate_interactive_video( - data: SpeakingGenerateInteractiveVideoDTO, +async def generate_video_2( + data: GenerateVideo2DTO, speaking_controller: ISpeakingController = Depends(Provide[controller]) ): - return await speaking_controller.generate_interactive_video(data) + return await speaking_controller.generate_video( + 2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix + ) + + +@speaking_router.post( + '/generate_video/3', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def generate_video_3( + data: GenerateVideo3DTO, + speaking_controller: ISpeakingController = Depends(Provide[controller]) +): + return await speaking_controller.generate_video( + 3, data.avatar, data.topic, data.questions + ) diff --git a/app/api/training.py b/app/api/training.py index a9ad5ae..8c4e44e 100644 --- a/app/api/training.py +++ b/app/api/training.py @@ -1,7 +1,7 @@ from dependency_injector.wiring import Provide, inject -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request -from app.dtos import TipsDTO +from app.dtos.training import FetchTipsDTO from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.controllers.abc import ITrainingController @@ -15,7 +15,20 @@ training_router = APIRouter() ) @inject async def get_reading_passage( - data: TipsDTO, + data: FetchTipsDTO, training_controller: ITrainingController = Depends(Provide[controller]) ): return await training_controller.fetch_tips(data) + + +@training_router.post( + '/', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def training_content( + request: Request, + training_controller: ITrainingController = Depends(Provide[controller]) +): + data = await request.json() + return await training_controller.get_training_content(data) diff --git a/app/configs/constants.py b/app/configs/constants.py index 9534d70..bb58dba 100644 --- a/app/configs/constants.py +++ b/app/configs/constants.py @@ -2,7 +2,7 @@ from enum import Enum BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine", "cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism", - "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians", + "discrimination", "politic", "christianity", "islam", "christian", "christians", "jews", "jew", "discrimination", "discriminatory"] @@ -11,6 +11,26 @@ class ExamVariant(Enum): PARTIAL = "partial" +class CustomLevelExerciseTypes(Enum): + MULTIPLE_CHOICE_4 = "multiple_choice_4" + MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space" + MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined" + BLANK_SPACE_TEXT = "blank_space_text" + READING_PASSAGE_UTAS = "reading_passage_utas" + WRITING_LETTER = "writing_letter" + WRITING_2 = "writing_2" + SPEAKING_1 = "speaking_1" + SPEAKING_2 = "speaking_2" + SPEAKING_3 = "speaking_3" + READING_1 = "reading_1" + READING_2 = "reading_2" + READING_3 = "reading_3" + LISTENING_1 = "listening_1" + LISTENING_2 = "listening_2" + LISTENING_3 = "listening_3" + LISTENING_4 = "listening_4" + + class QuestionType(Enum): LISTENING_SECTION_1 = "Listening Section 1" LISTENING_SECTION_2 = "Listening Section 2" @@ -63,7 +83,14 @@ class FieldsAndExercises: GEN_TEXT_FIELDS = ['title'] LISTENING_GEN_FIELDS = ['transcript', 'exercise'] READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch'] + READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch'] + LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm'] + LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill', + 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm'] + LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions'] + LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions'] + LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm'] TOTAL_READING_PASSAGE_1_EXERCISES = 13 TOTAL_READING_PASSAGE_2_EXERCISES = 13 @@ -218,7 +245,6 @@ class EducationalContent: "Space Exploration", "Artificial Intelligence", "Climate Change", - "World Religions", "The Human Brain", "Renewable Energy", "Cultural Diversity", diff --git a/app/configs/dependency_injection.py b/app/configs/dependency_injection.py index c269f6f..c1d13fd 100644 --- a/app/configs/dependency_injection.py +++ b/app/configs/dependency_injection.py @@ -1,3 +1,4 @@ +import json import os from dependency_injector import providers, containers @@ -6,6 +7,7 @@ from openai import AsyncOpenAI from httpx import AsyncClient as HTTPClient from google.cloud.firestore_v1 import AsyncClient as FirestoreClient from dotenv import load_dotenv +from sentence_transformers import SentenceTransformer from app.repositories.impl import * from app.services.impl import * @@ -60,16 +62,26 @@ def config_di( writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector) + with open('app/services/impl/level/mc_variants.json', 'r') as file: + mc_variants = json.load(file) + level_service = providers.Factory( - LevelService, llm=llm, document_store=firestore, reading_service=reading_service + LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service, + writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service ) grade_service = providers.Factory( GradeService, llm=llm ) + embeddings = SentenceTransformer('all-MiniLM-L6-v2') + + training_kb = providers.Factory( + TrainingContentKnowledgeBase, embeddings=embeddings + ) + training_service = providers.Factory( - TrainingService, llm=llm + TrainingService, llm=llm, firestore=firestore, training_kb=training_kb ) # Controllers diff --git a/app/controllers/abc/grade.py b/app/controllers/abc/grade.py index fc851b2..162e246 100644 --- a/app/controllers/abc/grade.py +++ b/app/controllers/abc/grade.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict +from typing import Dict, List class IGradeController(ABC): @@ -9,18 +9,14 @@ class IGradeController(ABC): pass @abstractmethod - async def grade_speaking_task(self, task: int, data: Dict): + async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: + pass + + @abstractmethod + async def grade_short_answers(self, data: Dict): pass @abstractmethod async def grading_summary(self, data: Dict): pass - @abstractmethod - async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str): - pass - - @abstractmethod - async def _grade_speaking_task3(self, answers: Dict): - pass - diff --git a/app/controllers/abc/level.py b/app/controllers/abc/level.py index 0cef88a..43fe296 100644 --- a/app/controllers/abc/level.py +++ b/app/controllers/abc/level.py @@ -1,5 +1,8 @@ from abc import ABC, abstractmethod +from fastapi import UploadFile +from typing import Dict + class ILevelController(ABC): @@ -10,3 +13,11 @@ class ILevelController(ABC): @abstractmethod async def get_level_utas(self): pass + + @abstractmethod + async def upload_level(self, file: UploadFile): + pass + + @abstractmethod + async def get_custom_level(self, data: Dict): + pass diff --git a/app/controllers/abc/speaking.py b/app/controllers/abc/speaking.py index d7d48c0..6b96a23 100644 --- a/app/controllers/abc/speaking.py +++ b/app/controllers/abc/speaking.py @@ -1,11 +1,13 @@ from abc import ABC, abstractmethod +from typing import Optional + from fastapi import BackgroundTasks class ISpeakingController(ABC): @abstractmethod - async def get_speaking_task(self, task: int, topic: str, difficulty: str): + async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None): pass @abstractmethod @@ -13,9 +15,11 @@ class ISpeakingController(ABC): pass @abstractmethod - async def generate_speaking_video(self, data): - pass - - @abstractmethod - async def generate_interactive_video(self, data): + async def generate_video( + self, part: int, avatar: str, topic: str, questions: list[str], + *, + second_topic: Optional[str] = None, + prompts: Optional[list[str]] = None, + suffix: Optional[str] = None, + ): pass diff --git a/app/controllers/abc/training.py b/app/controllers/abc/training.py index 2ba831f..1ce25c0 100644 --- a/app/controllers/abc/training.py +++ b/app/controllers/abc/training.py @@ -6,3 +6,7 @@ class ITrainingController(ABC): @abstractmethod async def fetch_tips(self, data): pass + + @abstractmethod + async def get_training_content(self, data): + pass diff --git a/app/controllers/impl/grade.py b/app/controllers/impl/grade.py index 791b57b..3474664 100644 --- a/app/controllers/impl/grade.py +++ b/app/controllers/impl/grade.py @@ -1,17 +1,12 @@ import logging -import os -import uuid -from typing import Dict - -from fastapi import HTTPException -from pydantic import ValidationError +from typing import Dict, List from app.configs.constants import FilePaths from app.controllers.abc import IGradeController -from app.dtos.speaking import SpeakingGradeTask1And2DTO, SpeakingGradeTask3DTO from app.dtos.writing import WritingGradeTaskDTO -from app.helpers import IOHelper +from app.helpers import FileHelper from app.services.abc import ISpeakingService, IWritingService, IGradeService +from app.utils import handle_exception class GradeController(IGradeController): @@ -28,47 +23,20 @@ class GradeController(IGradeController): self._logger = logging.getLogger(__name__) async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO): - try: - return await self._writing_service.grade_writing_task(task, data.question, data.answer) - except Exception as e: - return str(e) + return await self._writing_service.grade_writing_task(task, data.question, data.answer) - async def grade_speaking_task(self, task: int, data: Dict): - try: - if task in {1, 2}: - body = SpeakingGradeTask1And2DTO(**data) - return await self._grade_speaking_task_1_2(task, body.question, body.answer) - else: - body = SpeakingGradeTask3DTO(**data) - return await self._grade_speaking_task3(body.answers) - except ValidationError as e: - raise HTTPException(status_code=422, detail=e.errors()) + @handle_exception(400) + async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: + FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) + return await self._speaking_service.grade_speaking_task(task, answers) + + async def grade_short_answers(self, data: Dict): + return await self._service.grade_short_answers(data) async def grading_summary(self, data: Dict): - try: - section_keys = ['reading', 'listening', 'writing', 'speaking', 'level'] - extracted_sections = self._extract_existing_sections_from_body(data, section_keys) - return await self._service.calculate_grading_summary(extracted_sections) - except Exception as e: - return str(e) - - async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str): - sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4()) - try: - IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) - return await self._speaking_service.grade_speaking_task_1_and_2( - task, question, answer_firebase_path, sound_file_name - ) - except Exception as e: - os.remove(sound_file_name) - return str(e), 400 - - async def _grade_speaking_task3(self, answers: Dict): - try: - IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) - return await self._speaking_service.grade_speaking_task_3(answers) - except Exception as e: - return str(e), 400 + section_keys = ['reading', 'listening', 'writing', 'speaking', 'level'] + extracted_sections = self._extract_existing_sections_from_body(data, section_keys) + return await self._service.calculate_grading_summary(extracted_sections) @staticmethod def _extract_existing_sections_from_body(my_dict, keys_to_extract): diff --git a/app/controllers/impl/level.py b/app/controllers/impl/level.py index 622163c..eacb202 100644 --- a/app/controllers/impl/level.py +++ b/app/controllers/impl/level.py @@ -1,3 +1,6 @@ +from fastapi import UploadFile +from typing import Dict + from app.controllers.abc import ILevelController from app.services.abc import ILevelService @@ -8,13 +11,13 @@ class LevelController(ILevelController): self._service = level_service async def get_level_exam(self): - try: - return await self._service.get_level_exam() - except Exception as e: - return str(e) + return await self._service.get_level_exam() async def get_level_utas(self): - try: - return await self._service.get_level_utas() - except Exception as e: - return str(e) + return await self._service.get_level_utas() + + async def upload_level(self, file: UploadFile): + return await self._service.upload_level(file) + + async def get_custom_level(self, data: Dict): + return await self._service.get_custom_level(data) diff --git a/app/controllers/impl/listening.py b/app/controllers/impl/listening.py index 9ed57d1..3095388 100644 --- a/app/controllers/impl/listening.py +++ b/app/controllers/impl/listening.py @@ -1,97 +1,19 @@ -import random -import logging from typing import List from app.controllers.abc import IListeningController -from app.dtos import SaveListeningDTO +from app.dtos.listening import SaveListeningDTO from app.services.abc import IListeningService -from app.helpers import IOHelper, ExercisesHelper -from app.configs.constants import ( - FilePaths, EducationalContent, FieldsAndExercises -) class ListeningController(IListeningController): def __init__(self, listening_service: IListeningService): self._service = listening_service - self._logger = logging.getLogger(__name__) - self._sections = { - "section_1": { - "topic": EducationalContent.TWO_PEOPLE_SCENARIOS, - "exercise_sample_size": 1, - "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES, - "type": "conversation", - "start_id": 1 - }, - "section_2": { - "topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS, - "exercise_sample_size": 2, - "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES, - "type": "monologue", - "start_id": 11 - }, - "section_3": { - "topic": EducationalContent.FOUR_PEOPLE_SCENARIOS, - "exercise_sample_size": 1, - "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES, - "type": "conversation", - "start_id": 21 - }, - "section_4": { - "topic": EducationalContent.ACADEMIC_SUBJECTS, - "exercise_sample_size": 2, - "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES, - "type": "monologue", - "start_id": 31 - } - } - async def get_listening_question(self, section_id: int, topic: str, req_exercises: List[str], difficulty: str): - try: - IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) - section = self._sections[f"section_{str(section_id)}"] - if not topic: - topic = random.choice(section["topic"]) - - if len(req_exercises) == 0: - req_exercises = random.sample(FieldsAndExercises.LISTENING_EXERCISE_TYPES, section["exercise_sample_size"]) - - number_of_exercises_q = ExercisesHelper.divide_number_into_parts(section["total_exercises"], len(req_exercises)) - - dialog = await self._service.generate_listening_question(section_id, topic) - - if section_id in {1, 3}: - dialog = self.parse_conversation(dialog) - - self._logger.info(f'Generated {section["type"]}: {str(dialog)}') - - exercises = await self._service.generate_listening_exercises( - section_id, str(dialog), req_exercises, number_of_exercises_q, section["start_id"], difficulty - ) - - return { - "exercises": exercises, - "text": dialog, - "difficulty": difficulty - } - except Exception as e: - return str(e) + async def get_listening_question( + self, section_id: int, topic: str, req_exercises: List[str], difficulty: str + ): + return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty) async def save_listening(self, data: SaveListeningDTO): - try: - return await self._service.save_listening(data.parts, data.minTimer, data.difficulty) - except Exception as e: - return str(e) - - @staticmethod - def parse_conversation(conversation_data): - conversation_list = conversation_data.get('conversation', []) - readable_text = [] - - for message in conversation_list: - name = message.get('name', 'Unknown') - text = message.get('text', '') - readable_text.append(f"{name}: {text}") - - return "\n".join(readable_text) + return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id) diff --git a/app/controllers/impl/reading.py b/app/controllers/impl/reading.py index da18388..d496c02 100644 --- a/app/controllers/impl/reading.py +++ b/app/controllers/impl/reading.py @@ -15,29 +15,29 @@ class ReadingController(IReadingController): self._logger = logging.getLogger(__name__) self._passages = { "passage_1": { + "start_id": 1, "total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES }, "passage_2": { + "start_id": 14, "total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES }, "passage_3": { + "start_id": 27, "total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES } } async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str): - try: - passage = self._passages[f'passage_{str(passage_id)}'] + passage = self._passages[f'passage_{str(passage_id)}'] - if len(req_exercises) == 0: - req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2) + if len(req_exercises) == 0: + req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2) - number_of_exercises_q = ExercisesHelper.divide_number_into_parts( - passage["total_exercises"], len(req_exercises) - ) + number_of_exercises_q = ExercisesHelper.divide_number_into_parts( + passage["total_exercises"], len(req_exercises) + ) - return await self._service.gen_reading_passage( - passage_id, topic, req_exercises, number_of_exercises_q, difficulty - ) - except Exception as e: - return str(e) + return await self._service.gen_reading_passage( + passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"] + ) diff --git a/app/controllers/impl/speaking.py b/app/controllers/impl/speaking.py index c0fbde7..7c2a383 100644 --- a/app/controllers/impl/speaking.py +++ b/app/controllers/impl/speaking.py @@ -1,13 +1,12 @@ import logging import uuid +from typing import Optional from fastapi import BackgroundTasks from app.controllers.abc import ISpeakingController -from app.dtos import ( - SaveSpeakingDTO, SpeakingGenerateVideoDTO, - SpeakingGenerateInteractiveVideoDTO -) +from app.dtos.speaking import SaveSpeakingDTO + from app.services.abc import ISpeakingService from app.configs.constants import ExamVariant, MinTimers from app.configs.question_templates import getSpeakingTemplate @@ -19,45 +18,30 @@ class SpeakingController(ISpeakingController): self._service = speaking_service self._logger = logging.getLogger(__name__) - async def get_speaking_task(self, task: int, topic: str, difficulty: str): - try: - return await self._service.get_speaking_task(task, topic, difficulty) - except Exception as e: - return str(e) + async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None): + return await self._service.get_speaking_part(task, topic, difficulty, second_topic) async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks): - try: - exercises = data.exercises - min_timer = data.minTimer + exercises = data.exercises + min_timer = data.minTimer - template = getSpeakingTemplate() - template["minTimer"] = min_timer + template = getSpeakingTemplate() + template["minTimer"] = min_timer - if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT: - template["variant"] = ExamVariant.PARTIAL.value - else: - template["variant"] = ExamVariant.FULL.value + if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT: + template["variant"] = ExamVariant.PARTIAL.value + else: + template["variant"] = ExamVariant.FULL.value - req_id = str(uuid.uuid4()) - self._logger.info(f'Received request to save speaking with id: {req_id}') + req_id = str(uuid.uuid4()) + self._logger.info(f'Received request to save speaking with id: {req_id}') - background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id) + background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id) - self._logger.info('Started background task to save speaking.') + self._logger.info('Started background task to save speaking.') - # Return response without waiting for create_videos_and_save_to_db to finish - return {**template, "id": req_id} - except Exception as e: - return str(e) + # Return response without waiting for create_videos_and_save_to_db to finish + return {**template, "id": req_id} - async def generate_speaking_video(self, data: SpeakingGenerateVideoDTO): - try: - return await self._service.generate_speaking_video(data.question, data.topic, data.avatar, data.prompts) - except Exception as e: - return str(e) - - async def generate_interactive_video(self, data: SpeakingGenerateInteractiveVideoDTO): - try: - return await self._service.generate_interactive_video(data.questions, data.topic, data.avatar) - except Exception as e: - return str(e) + async def generate_video(self, *args, **kwargs): + return await self._service.generate_video(*args, **kwargs) diff --git a/app/controllers/impl/training.py b/app/controllers/impl/training.py index d3e3fd9..dc39017 100644 --- a/app/controllers/impl/training.py +++ b/app/controllers/impl/training.py @@ -1,5 +1,7 @@ +from typing import Dict + from app.controllers.abc import ITrainingController -from app.dtos import TipsDTO +from app.dtos.training import FetchTipsDTO from app.services.abc import ITrainingService @@ -8,8 +10,8 @@ class TrainingController(ITrainingController): def __init__(self, training_service: ITrainingService): self._service = training_service - async def fetch_tips(self, data: TipsDTO): - try: - return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer) - except Exception as e: - return str(e) + async def fetch_tips(self, data: FetchTipsDTO): + return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer) + + async def get_training_content(self, data: Dict): + return await self._service.get_training_content(data) diff --git a/app/controllers/impl/writing.py b/app/controllers/impl/writing.py index 45018ce..b01726d 100644 --- a/app/controllers/impl/writing.py +++ b/app/controllers/impl/writing.py @@ -8,7 +8,4 @@ class WritingController(IWritingController): self._service = writing_service async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str): - try: - return await self._service.get_writing_task_general_question(task, topic, difficulty) - except Exception as e: - return str(e) + return await self._service.get_writing_task_general_question(task, topic, difficulty) diff --git a/app/dtos/__init__.py b/app/dtos/__init__.py index b51dd73..e69de29 100644 --- a/app/dtos/__init__.py +++ b/app/dtos/__init__.py @@ -1,19 +0,0 @@ -from .listening import SaveListeningDTO -from .speaking import ( - SaveSpeakingDTO, SpeakingGradeTask1And2DTO, - SpeakingGradeTask3DTO, SpeakingGenerateVideoDTO, - SpeakingGenerateInteractiveVideoDTO -) -from .training import TipsDTO -from .writing import WritingGradeTaskDTO - -__all__ = [ - "SaveListeningDTO", - "SaveSpeakingDTO", - "SpeakingGradeTask1And2DTO", - "SpeakingGradeTask3DTO", - "SpeakingGenerateVideoDTO", - "SpeakingGenerateInteractiveVideoDTO", - "TipsDTO", - "WritingGradeTaskDTO" -] diff --git a/app/dtos/exam.py b/app/dtos/exam.py new file mode 100644 index 0000000..779daea --- /dev/null +++ b/app/dtos/exam.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel, Field +from typing import List, Dict, Union, Optional +from uuid import uuid4, UUID + + +class Option(BaseModel): + id: str + text: str + + +class MultipleChoiceQuestion(BaseModel): + id: str + prompt: str + variant: str = "text" + solution: str + options: List[Option] + + +class MultipleChoiceExercise(BaseModel): + id: UUID = Field(default_factory=uuid4) + type: str = "multipleChoice" + prompt: str = "Select the appropriate option." + questions: List[MultipleChoiceQuestion] + userSolutions: List = Field(default_factory=list) + + +class FillBlanksWord(BaseModel): + id: str + options: Dict[str, str] + + +class FillBlanksSolution(BaseModel): + id: str + solution: str + + +class FillBlanksExercise(BaseModel): + id: UUID = Field(default_factory=uuid4) + type: str = "fillBlanks" + variant: str = "mc" + prompt: str = "Click a blank to select the appropriate word for it." + text: str + solutions: List[FillBlanksSolution] + words: List[FillBlanksWord] + userSolutions: List = Field(default_factory=list) + + +Exercise = Union[MultipleChoiceExercise, FillBlanksExercise] + + +class Part(BaseModel): + exercises: List[Exercise] + context: Optional[str] = Field(default=None) + + +class Exam(BaseModel): + parts: List[Part] diff --git a/app/dtos/listening.py b/app/dtos/listening.py index 83096bd..d7e44db 100644 --- a/app/dtos/listening.py +++ b/app/dtos/listening.py @@ -1,4 +1,5 @@ import random +import uuid from typing import List, Dict from pydantic import BaseModel @@ -10,3 +11,4 @@ class SaveListeningDTO(BaseModel): parts: List[Dict] minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT difficulty: str = random.choice(EducationalContent.DIFFICULTIES) + id: str = str(uuid.uuid4()) diff --git a/app/dtos/sheet.py b/app/dtos/sheet.py new file mode 100644 index 0000000..8efac82 --- /dev/null +++ b/app/dtos/sheet.py @@ -0,0 +1,29 @@ +from pydantic import BaseModel +from typing import List, Dict, Union, Any, Optional + + +class Option(BaseModel): + id: str + text: str + + +class MultipleChoiceQuestion(BaseModel): + type: str = "multipleChoice" + id: str + prompt: str + variant: str = "text" + options: List[Option] + + +class FillBlanksWord(BaseModel): + type: str = "fillBlanks" + id: str + options: Dict[str, str] + + +Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]] + + +class Sheet(BaseModel): + batch: Optional[int] = None + components: List[Component] diff --git a/app/dtos/speaking.py b/app/dtos/speaking.py index 14808de..7c8b124 100644 --- a/app/dtos/speaking.py +++ b/app/dtos/speaking.py @@ -11,23 +11,31 @@ class SaveSpeakingDTO(BaseModel): minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT -class SpeakingGradeTask1And2DTO(BaseModel): +class GradeSpeakingDTO(BaseModel): question: str answer: str -class SpeakingGradeTask3DTO(BaseModel): - answers: Dict +class GradeSpeakingAnswersDTO(BaseModel): + answers: List[Dict] -class SpeakingGenerateVideoDTO(BaseModel): +class GenerateVideo1DTO(BaseModel): + avatar: str = (random.choice(list(AvatarEnum))).value + questions: List[str] + first_topic: str + second_topic: str + + +class GenerateVideo2DTO(BaseModel): avatar: str = (random.choice(list(AvatarEnum))).value prompts: List[str] = [] + suffix: str = "" question: str topic: str -class SpeakingGenerateInteractiveVideoDTO(BaseModel): +class GenerateVideo3DTO(BaseModel): avatar: str = (random.choice(list(AvatarEnum))).value questions: List[str] topic: str diff --git a/app/dtos/training.py b/app/dtos/training.py index cb82490..d5de433 100644 --- a/app/dtos/training.py +++ b/app/dtos/training.py @@ -1,8 +1,37 @@ from pydantic import BaseModel +from typing import List -class TipsDTO(BaseModel): +class FetchTipsDTO(BaseModel): context: str question: str answer: str correct_answer: str + + +class QueryDTO(BaseModel): + category: str + text: str + + +class DetailsDTO(BaseModel): + exam_id: str + date: int + performance_comment: str + detailed_summary: str + + +class WeakAreaDTO(BaseModel): + area: str + comment: str + + +class TrainingContentDTO(BaseModel): + details: List[DetailsDTO] + weak_areas: List[WeakAreaDTO] + queries: List[QueryDTO] + + +class TipsDTO(BaseModel): + tip_ids: List[str] + diff --git a/app/helpers/__init__.py b/app/helpers/__init__.py index fb1275d..eddd6da 100644 --- a/app/helpers/__init__.py +++ b/app/helpers/__init__.py @@ -1,11 +1,13 @@ -from .io import IOHelper -from .text_helper import TextHelper +from .file import FileHelper +from .text import TextHelper from .token_counter import count_tokens -from .exercises_helper import ExercisesHelper +from .exercises import ExercisesHelper +from .logger import LoggerHelper __all__ = [ - "IOHelper", + "FileHelper", "TextHelper", "count_tokens", - "ExercisesHelper" + "ExercisesHelper", + "LoggerHelper" ] diff --git a/app/helpers/exercises_helper.py b/app/helpers/exercises.py similarity index 76% rename from app/helpers/exercises_helper.py rename to app/helpers/exercises.py index 618c1da..be40bc8 100644 --- a/app/helpers/exercises_helper.py +++ b/app/helpers/exercises.py @@ -4,7 +4,7 @@ import re import string from wonderwords import RandomWord -from .text_helper import TextHelper +from .text import TextHelper class ExercisesHelper: @@ -70,7 +70,12 @@ class ExercisesHelper: random.shuffle(combined_array) - return combined_array + result = [] + for i, word in enumerate(combined_array): + letter = chr(65 + i) # chr(65) is 'A' + result.append({"letter": letter, "word": word}) + + return result @staticmethod def fillblanks_build_solutions_array(words, start_id): @@ -187,9 +192,58 @@ class ExercisesHelper: @staticmethod def fix_writing_overall(overall: float, task_response: dict): - if overall > max(task_response.values()) or overall < min(task_response.values()): - total_sum = sum(task_response.values()) - average = total_sum / len(task_response.values()) + grades = [category["grade"] for category in task_response.values()] + + if overall > max(grades) or overall < min(grades): + total_sum = sum(grades) + average = total_sum / len(grades) rounded_average = round(average, 0) return rounded_average + return overall + + @staticmethod + def build_options(ideas): + options = [] + letters = iter(string.ascii_uppercase) + for idea in ideas: + options.append({ + "id": next(letters), + "sentence": idea["from"] + }) + return options + + @staticmethod + def build_sentences(ideas, start_id): + sentences = [] + letters = iter(string.ascii_uppercase) + for idea in ideas: + sentences.append({ + "solution": next(letters), + "sentence": idea["idea"] + }) + + random.shuffle(sentences) + for i, sentence in enumerate(sentences, start=start_id): + sentence["id"] = i + return sentences + + @staticmethod + def randomize_mc_options_order(questions): + option_ids = ['A', 'B', 'C', 'D'] + + for question in questions: + # Store the original solution text + original_solution_text = next( + option['text'] for option in question['options'] if option['id'] == question['solution']) + + # Shuffle the options + random.shuffle(question['options']) + + # Update the option ids and find the new solution id + for idx, option in enumerate(question['options']): + option['id'] = option_ids[idx] + if option['text'] == original_solution_text: + question['solution'] = option['id'] + + return questions diff --git a/app/helpers/file.py b/app/helpers/file.py new file mode 100644 index 0000000..aa3230b --- /dev/null +++ b/app/helpers/file.py @@ -0,0 +1,95 @@ +import datetime +from pathlib import Path +import base64 +import io +import os +import shutil +import subprocess +from typing import Optional + +import numpy as np +import pypandoc +from PIL import Image + +import aiofiles + + +class FileHelper: + + @staticmethod + def delete_files_older_than_one_day(directory: str): + current_time = datetime.datetime.now() + + for entry in os.scandir(directory): + if entry.is_file(): + file_path = Path(entry) + file_name = file_path.name + file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) + time_difference = current_time - file_modified_time + if time_difference.days > 1 and "placeholder" not in file_name: + file_path.unlink() + print(f"Deleted file: {file_path}") + + # Supposedly pandoc covers a wide range of file extensions only tested with docx + @staticmethod + def convert_file_to_pdf(input_path: str, output_path: str): + pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[ + '-V', 'geometry:paperwidth=5.5in', + '-V', 'geometry:paperheight=8.5in', + '-V', 'geometry:margin=0.5in', + '-V', 'pagestyle=empty' + ]) + + @staticmethod + def convert_file_to_html(input_path: str, output_path: str): + pypandoc.convert_file(input_path, 'html', outputfile=output_path) + + @staticmethod + def pdf_to_png(path_id: str): + to_png = f"pdftoppm -png exercises.pdf page" + result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True) + if result.returncode != 0: + raise Exception( + f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```") + + @staticmethod + def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool: + with Image.open(io.BytesIO(image_bytes)) as img: + img_gray = img.convert('L') + img_array = np.array(img_gray) + non_white_pixels = np.sum(img_array < 255) + + return non_white_pixels <= image_threshold + + @classmethod + async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]: + async with aiofiles.open(image_path, "rb") as image_file: + image_bytes = await image_file.read() + + if cls.is_page_blank(image_bytes, image_threshold): + return None + + return base64.b64encode(image_bytes).decode('utf-8') + + @classmethod + def b64_pngs(cls, path_id: str, files: list[str]): + png_messages = [] + for filename in files: + b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename)) + if b64_string: + png_messages.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{b64_string}" + } + }) + return png_messages + + @staticmethod + def remove_directory(path): + try: + if os.path.exists(path): + if os.path.isdir(path): + shutil.rmtree(path) + except Exception as e: + print(f"An error occurred while trying to remove {path}: {str(e)}") diff --git a/app/helpers/io.py b/app/helpers/io.py deleted file mode 100644 index c170395..0000000 --- a/app/helpers/io.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime -import os -from pathlib import Path - - -class IOHelper: - - @staticmethod - def delete_files_older_than_one_day(directory: str): - current_time = datetime.datetime.now() - - for entry in os.scandir(directory): - if entry.is_file(): - file_path = Path(entry) - file_name = file_path.name - file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) - time_difference = current_time - file_modified_time - if time_difference.days > 1 and "placeholder" not in file_name: - file_path.unlink() - print(f"Deleted file: {file_path}") diff --git a/app/helpers/logger.py b/app/helpers/logger.py new file mode 100644 index 0000000..762766a --- /dev/null +++ b/app/helpers/logger.py @@ -0,0 +1,23 @@ +import logging +from functools import wraps + + +class LoggerHelper: + + @staticmethod + def suppress_loggers(): + def decorator(f): + @wraps(f) + def wrapped(*args, **kwargs): + root_logger = logging.getLogger() + original_level = root_logger.level + + root_logger.setLevel(logging.ERROR) + + try: + return f(*args, **kwargs) + finally: + root_logger.setLevel(original_level) + + return wrapped + return decorator diff --git a/app/helpers/text_helper.py b/app/helpers/text.py similarity index 100% rename from app/helpers/text_helper.py rename to app/helpers/text.py diff --git a/app/mappers/__init__.py b/app/mappers/__init__.py new file mode 100644 index 0000000..bc00787 --- /dev/null +++ b/app/mappers/__init__.py @@ -0,0 +1,5 @@ +from .exam import ExamMapper + +__all__ = [ + "ExamMapper" +] diff --git a/app/mappers/exam.py b/app/mappers/exam.py new file mode 100644 index 0000000..df26eea --- /dev/null +++ b/app/mappers/exam.py @@ -0,0 +1,66 @@ +from typing import Dict, Any + +from pydantic import ValidationError + +from app.dtos.exam import ( + MultipleChoiceExercise, + FillBlanksExercise, + Part, Exam +) +from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord + + +class ExamMapper: + + @staticmethod + def map_to_exam_model(response: Dict[str, Any]) -> Exam: + parts = [] + for part in response['parts']: + part_exercises = part['exercises'] + context = part.get('context', None) + + exercises = [] + for exercise in part_exercises: + exercise_type = exercise['type'] + if exercise_type == 'multipleChoice': + exercise_model = MultipleChoiceExercise(**exercise) + elif exercise_type == 'fillBlanks': + exercise_model = FillBlanksExercise(**exercise) + else: + raise ValidationError(f"Unknown exercise type: {exercise_type}") + + exercises.append(exercise_model) + + part_kwargs = {"exercises": exercises} + if context is not None: + part_kwargs["context"] = context + + part_model = Part(**part_kwargs) + parts.append(part_model) + + return Exam(parts=parts) + + @staticmethod + def map_to_sheet(response: Dict[str, Any]) -> Sheet: + components = [] + + for item in response["components"]: + component_type = item["type"] + + if component_type == "multipleChoice": + options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]] + components.append(MultipleChoiceQuestion( + id=item["id"], + prompt=item["prompt"], + variant=item.get("variant", "text"), + options=options + )) + elif component_type == "fillBlanks": + components.append(FillBlanksWord( + id=item["id"], + options=item["options"] + )) + else: + components.append(item) + + return Sheet(components=components) diff --git a/app/repositories/abc/document_store.py b/app/repositories/abc/document_store.py index 4afe66d..78b0a12 100644 --- a/app/repositories/abc/document_store.py +++ b/app/repositories/abc/document_store.py @@ -11,3 +11,6 @@ class IDocumentStore(ABC): async def get_all(self, collection: str): pass + + async def get_doc_by_id(self, collection: str, doc_id: str): + pass diff --git a/app/repositories/impl/document_stores/firestore.py b/app/repositories/impl/document_stores/firestore.py index f95badf..30f4e3e 100644 --- a/app/repositories/impl/document_stores/firestore.py +++ b/app/repositories/impl/document_stores/firestore.py @@ -15,9 +15,9 @@ class Firestore(IDocumentStore): update_time, document_ref = await collection_ref.add(item) if document_ref: self._logger.info(f"Document added with ID: {document_ref.id}") - return True, document_ref.id + return document_ref.id else: - return False, None + return None async def save_to_db_with_id(self, collection: str, item, id: str): collection_ref: AsyncCollectionReference = self._client.collection(collection) @@ -26,9 +26,9 @@ class Firestore(IDocumentStore): doc_snapshot = await document_ref.get() if doc_snapshot.exists: self._logger.info(f"Document added with ID: {document_ref.id}") - return True, document_ref.id + return document_ref.id else: - return False, None + return None async def get_all(self, collection: str): collection_ref: AsyncCollectionReference = self._client.collection(collection) @@ -36,3 +36,12 @@ class Firestore(IDocumentStore): async for doc in collection_ref.stream(): docs.append(doc.to_dict()) return docs + + async def get_doc_by_id(self, collection: str, doc_id: str): + collection_ref: AsyncCollectionReference = self._client.collection(collection) + doc_ref: AsyncDocumentReference = collection_ref.document(doc_id) + doc = await doc_ref.get() + + if doc.exists: + return doc.to_dict() + return None diff --git a/app/server.py b/app/server.py index 71cc8fa..5ff7d5d 100644 --- a/app/server.py +++ b/app/server.py @@ -116,6 +116,16 @@ def setup_listeners(_app: FastAPI) -> None: content={"error_code": exc.error_code, "message": exc.message}, ) + @_app.exception_handler(Exception) + async def default_exception_handler(request: Request, exc: Exception): + """ + Don't delete request param + """ + return JSONResponse( + status_code=500, + content=str(exc), + ) + def setup_middleware() -> List[Middleware]: middleware = [ @@ -135,9 +145,10 @@ def setup_middleware() -> List[Middleware]: def create_app() -> FastAPI: + env = os.getenv("ENV") _app = FastAPI( - docs_url=None, - redoc_url=None, + docs_url="/docs" if env != "prod" else None, + redoc_url="/redoc" if env != "prod" else None, middleware=setup_middleware(), lifespan=lifespan ) diff --git a/app/services/abc/__init__.py b/app/services/abc/__init__.py index 82b6cba..b8130df 100644 --- a/app/services/abc/__init__.py +++ b/app/services/abc/__init__.py @@ -5,6 +5,7 @@ from .speaking import ISpeakingService from .reading import IReadingService from .grade import IGradeService from .training import ITrainingService +from .kb import IKnowledgeBase from .third_parties import * __all__ = [ diff --git a/app/services/abc/grade.py b/app/services/abc/grade.py index 41ce1c3..9ee89eb 100644 --- a/app/services/abc/grade.py +++ b/app/services/abc/grade.py @@ -4,20 +4,10 @@ from typing import Dict, List class IGradeService(ABC): + @abstractmethod + async def grade_short_answers(self, data: Dict): + pass + @abstractmethod async def calculate_grading_summary(self, extracted_sections: List): pass - - @abstractmethod - async def _calculate_section_grade_summary(self, section): - pass - - @staticmethod - @abstractmethod - def _parse_openai_response(response): - pass - - @staticmethod - @abstractmethod - def _parse_bullet_points(bullet_points_str, grade): - pass diff --git a/app/services/abc/kb.py b/app/services/abc/kb.py new file mode 100644 index 0000000..4568c0c --- /dev/null +++ b/app/services/abc/kb.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + +from typing import List, Dict + + +class IKnowledgeBase(ABC): + + @abstractmethod + def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]: + pass diff --git a/app/services/abc/level.py b/app/services/abc/level.py index 127235f..7f7d954 100644 --- a/app/services/abc/level.py +++ b/app/services/abc/level.py @@ -1,10 +1,19 @@ from abc import ABC, abstractmethod +import random + +from typing import Dict + +from fastapi import UploadFile + +from app.configs.constants import EducationalContent class ILevelService(ABC): @abstractmethod - async def get_level_exam(self): + async def get_level_exam( + self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False + ) -> Dict: pass @abstractmethod @@ -12,13 +21,27 @@ class ILevelService(ABC): pass @abstractmethod - async def _gen_multiple_choice_level(self, quantity: int, start_id=1): + async def get_custom_level(self, data: Dict): pass @abstractmethod - async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys): + async def upload_level(self, upload: UploadFile) -> Dict: pass @abstractmethod - async def _generate_single_mc_level_question(self): + async def gen_multiple_choice( + self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None + ): + pass + + @abstractmethod + async def gen_blank_space_text_utas( + self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS) + ): + pass + + @abstractmethod + async def gen_reading_passage_utas( + self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS) + ): pass diff --git a/app/services/abc/listening.py b/app/services/abc/listening.py index 3547122..4654fde 100644 --- a/app/services/abc/listening.py +++ b/app/services/abc/listening.py @@ -1,68 +1,18 @@ +import queue from abc import ABC, abstractmethod from queue import Queue -from typing import Dict +from typing import Dict, List class IListeningService(ABC): @abstractmethod - async def generate_listening_question(self, section: int, topic: str) -> Dict: - pass - - @abstractmethod - async def generate_listening_exercises( - self, section: int, dialog: str, - req_exercises: list[str], exercises_queue: Queue, - start_id: int, difficulty: str + async def get_listening_question( + self, section_id: int, topic: str, req_exercises: List[str], difficulty: str, + number_of_exercises_q=queue.Queue(), start_id=-1 ): pass @abstractmethod - async def save_listening(self, parts, min_timer, difficulty): + async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict: pass - - # ================================================================================================================== - # Helpers - # ================================================================================================================== - - @abstractmethod - async def _generate_listening_conversation(self, section: int, topic: str) -> Dict: - pass - - @abstractmethod - async def _generate_listening_monologue(self, section: int, topic: str) -> Dict: - pass - - @abstractmethod - def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool): - pass - - @staticmethod - @abstractmethod - def _get_random_voice(gender: str): - pass - - @abstractmethod - async def _gen_multiple_choice_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty - ): - pass - - @abstractmethod - async def _gen_write_blanks_questions_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty - ): - pass - - @abstractmethod - async def _gen_write_blanks_notes_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty - ): - pass - - @abstractmethod - async def _gen_write_blanks_form_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty - ): - pass - diff --git a/app/services/abc/reading.py b/app/services/abc/reading.py index 2621b62..70c672a 100644 --- a/app/services/abc/reading.py +++ b/app/services/abc/reading.py @@ -2,8 +2,6 @@ from abc import ABC, abstractmethod from queue import Queue from typing import List -from app.configs.constants import QuestionType - class IReadingService(ABC): @@ -14,36 +12,11 @@ class IReadingService(ABC): topic: str, req_exercises: List[str], number_of_exercises_q: Queue, - difficulty: str - ): - pass - - # ================================================================================================================== - # Helpers - # ================================================================================================================== - - @abstractmethod - async def generate_reading_passage(self, q_type: QuestionType, topic: str): - pass - - @abstractmethod - async def _generate_reading_exercises( - self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty + difficulty: str, + start_id: int ): pass @abstractmethod - async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty): - pass - - @abstractmethod - async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty): - pass - - @abstractmethod - async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty): - pass - - @abstractmethod - async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id): + async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800): pass diff --git a/app/services/abc/speaking.py b/app/services/abc/speaking.py index 5e19e05..48d0fa8 100644 --- a/app/services/abc/speaking.py +++ b/app/services/abc/speaking.py @@ -1,21 +1,17 @@ from abc import ABC, abstractmethod -from typing import List, Dict +from typing import List, Dict, Optional class ISpeakingService(ABC): @abstractmethod - async def get_speaking_task(self, task_id: int, topic: str, difficulty: str): + async def get_speaking_part( + self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None + ) -> Dict: pass @abstractmethod - async def grade_speaking_task_1_and_2( - self, task: int, question: str, answer_firebase_path: str, sound_file_name: str - ): - pass - - @abstractmethod - async def grade_speaking_task_3(self, answers: Dict, task: int = 3): + async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: pass @abstractmethod @@ -23,35 +19,11 @@ class ISpeakingService(ABC): pass @abstractmethod - async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]): - pass - - @abstractmethod - async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str): - pass - - # ================================================================================================================== - # Helpers - # ================================================================================================================== - - @staticmethod - @abstractmethod - def _zero_rating(comment: str): - pass - - @staticmethod - @abstractmethod - def _calculate_overall(response: Dict): - pass - - @abstractmethod - async def _get_speaking_corrections(self, text): - pass - - @abstractmethod - async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int): - pass - - @abstractmethod - async def _create_video(self, question: str, avatar: str, error_message: str): + async def generate_video( + self, part: int, avatar: str, topic: str, questions: list[str], + *, + second_topic: Optional[str] = None, + prompts: Optional[list[str]] = None, + suffix: Optional[str] = None, + ): pass diff --git a/app/services/abc/third_parties/llm.py b/app/services/abc/third_parties/llm.py index 03d5550..38ba83c 100644 --- a/app/services/abc/third_parties/llm.py +++ b/app/services/abc/third_parties/llm.py @@ -1,6 +1,10 @@ from abc import ABC, abstractmethod -from typing import List, Optional +from typing import List, Optional, TypeVar, Callable +from openai.types.chat import ChatCompletionMessageParam +from pydantic import BaseModel + +T = TypeVar('T', bound=BaseModel) class ILLMService(ABC): @@ -19,3 +23,16 @@ class ILLMService(ABC): @abstractmethod async def prediction_override(self, **kwargs): pass + + @abstractmethod + async def pydantic_prediction( + self, + messages: List[ChatCompletionMessageParam], + map_to_model: Callable, + json_scheme: str, + *, + model: Optional[str] = None, + temperature: Optional[float] = None, + max_retries: int = 3 + ) -> List[T] | T | None: + pass diff --git a/app/services/abc/training.py b/app/services/abc/training.py index f4719f0..bb62f01 100644 --- a/app/services/abc/training.py +++ b/app/services/abc/training.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod +from typing import Dict + class ITrainingService(ABC): @@ -7,7 +9,6 @@ class ITrainingService(ABC): async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str): pass - @staticmethod @abstractmethod - def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None): + async def get_training_content(self, training_content: Dict) -> Dict: pass diff --git a/app/services/abc/writing.py b/app/services/abc/writing.py index c2dcce1..a59d442 100644 --- a/app/services/abc/writing.py +++ b/app/services/abc/writing.py @@ -1,6 +1,4 @@ from abc import ABC, abstractmethod -from typing import Dict - class IWritingService(ABC): @@ -11,22 +9,3 @@ class IWritingService(ABC): @abstractmethod async def grade_writing_task(self, task: int, question: str, answer: str): pass - - # ================================================================================================================== - # Helpers - # ================================================================================================================== - - @staticmethod - @abstractmethod - def _get_writing_prompt(task: int, topic: str, difficulty: str): - pass - - @staticmethod - @abstractmethod - async def _get_fixed_text(self, text): - pass - - @staticmethod - @abstractmethod - def _zero_rating(comment: str): - pass diff --git a/app/services/impl/__init__.py b/app/services/impl/__init__.py index bf36cee..f0c65cb 100644 --- a/app/services/impl/__init__.py +++ b/app/services/impl/__init__.py @@ -4,7 +4,7 @@ from .reading import ReadingService from .speaking import SpeakingService from .writing import WritingService from .grade import GradeService -from .training import TrainingService +from .training import * from .third_parties import * __all__ = [ @@ -14,6 +14,6 @@ __all__ = [ "SpeakingService", "WritingService", "GradeService", - "TrainingService" ] __all__.extend(third_parties.__all__) +__all__.extend(training.__all__) diff --git a/app/services/impl/grade.py b/app/services/impl/grade.py index 723987b..f3792a2 100644 --- a/app/services/impl/grade.py +++ b/app/services/impl/grade.py @@ -1,42 +1,47 @@ import json -from typing import List -import copy +from typing import List, Dict +from app.configs.constants import GPTModels, TemperatureSettings from app.services.abc import ILLMService, IGradeService class GradeService(IGradeService): - chat_config = {'max_tokens': 1000, 'temperature': 0.2} - tools = [{ - "type": "function", - "function": { - "name": "save_evaluation_and_suggestions", - "description": "Saves the evaluation and suggestions requested by input.", - "parameters": { - "type": "object", - "properties": { - "evaluation": { - "type": "string", - "description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.", - }, - "suggestions": { - "type": "string", - "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.", - }, - "bullet_points": { - "type": "string", - "description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ", - }, - }, - "required": ["evaluation", "suggestions"], - }, - } - }] - def __init__(self, llm: ILLMService): self._llm = llm + async def grade_short_answers(self, data: Dict): + json_format = { + "exercises": [ + { + "id": 1, + "correct": True, + "correct_answer": " correct answer if wrong" + } + ] + } + + messages = [ + { + "role": "system", + "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}' + }, + { + "role": "user", + "content": ( + 'Grade these answers according to the text content and write a correct answer if they are ' + f'wrong. Text, questions and answers:\n {data}' + ) + } + ] + + return await self._llm.prediction( + GPTModels.GPT_4_O, + messages, + ["exercises"], + TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + async def calculate_grading_summary(self, extracted_sections: List): ret = [] @@ -116,8 +121,8 @@ class GradeService(IGradeService): ) }] - chat_config = copy.deepcopy(self.chat_config) - tools = copy.deepcopy(self.tools) + chat_config = {'max_tokens': 1000, 'temperature': 0.2} + tools = self.get_tools() res = await self._llm.prediction_override( model="gpt-3.5-turbo", @@ -154,3 +159,42 @@ class GradeService(IGradeService): return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines] else: return [] + + @staticmethod + def get_tools(): + return [ + { + "type": "function", + "function": { + "name": "save_evaluation_and_suggestions", + "description": "Saves the evaluation and suggestions requested by input.", + "parameters": { + "type": "object", + "properties": { + "evaluation": { + "type": "string", + "description": ( + "A comment on the IELTS section grade obtained in the specific section and what " + "it could mean without suggestions." + ), + }, + "suggestions": { + "type": "string", + "description": ( + "A small paragraph text with suggestions on how to possibly get a better grade " + "than the one obtained." + ), + }, + "bullet_points": { + "type": "string", + "description": ( + "Text with four bullet points to improve the english speaking ability. Only " + "include text for the bullet points separated by a paragraph." + ), + }, + }, + "required": ["evaluation", "suggestions"], + }, + } + } + ] diff --git a/app/services/impl/level.py b/app/services/impl/level.py deleted file mode 100644 index 40f651a..0000000 --- a/app/services/impl/level.py +++ /dev/null @@ -1,506 +0,0 @@ -import json -import random -import uuid - -from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType -from app.helpers import ExercisesHelper -from app.repositories.abc import IDocumentStore -from app.services.abc import ILevelService, ILLMService, IReadingService - - -class LevelService(ILevelService): - - def __init__( - self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService - ): - self._llm = llm - self._document_store = document_store - self._reading_service = reading_service - - async def get_level_exam(self): - number_of_exercises = 25 - exercises = await self._gen_multiple_choice_level(number_of_exercises) - return { - "exercises": [exercises], - "isDiagnostic": False, - "minTimer": 25, - "module": "level" - } - - async def _gen_multiple_choice_level(self, quantity: int, start_id=1): - gen_multiple_choice_for_text = ( - f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy ' - 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover ' - 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and ' - 'punctuation. Make sure every question only has 1 correct answer.' - ) - - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"questions": [{"id": "9", "options": ' - '[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, ' - '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], ' - '"prompt": "Which of the following is a conjunction?", ' - '"solution": "A", "variant": "text"}]}' - ) - }, - { - "role": "user", - "content": gen_multiple_choice_for_text - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - if len(question["questions"]) != quantity: - return await self._gen_multiple_choice_level(quantity, start_id) - else: - all_exams = await self._document_store.get_all("level") - seen_keys = set() - for i in range(len(question["questions"])): - question["questions"][i], seen_keys = await self._replace_exercise_if_exists( - all_exams, question["questions"][i], question, seen_keys - ) - return { - "id": str(uuid.uuid4()), - "prompt": "Select the appropriate option.", - "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"], - "type": "multipleChoice", - } - - async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys): - # Extracting relevant fields for comparison - key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) - # Check if the key is in the set - if key in seen_keys: - return await self._replace_exercise_if_exists( - all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys - ) - else: - seen_keys.add(key) - - for exam in all_exams: - exam_dict = exam.to_dict() - if any( - exercise["prompt"] == current_exercise["prompt"] and - any(exercise["options"][0]["text"] == current_option["text"] for current_option in - current_exercise["options"]) - for exercise in exam_dict.get("exercises", [])[0]["questions"] - ): - return await self._replace_exercise_if_exists( - all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys - ) - return current_exercise, seen_keys - - async def _generate_single_mc_level_question(self): - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, ' - '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], ' - '"prompt": "Which of the following is a conjunction?", ' - '"solution": "A", "variant": "text"}' - ) - }, - { - "role": "user", - "content": ( - 'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, ' - 'intermediate or advanced.' - ) - - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - return question - - async def get_level_utas(self): - # Formats - mc = { - "id": str(uuid.uuid4()), - "prompt": "Choose the correct word or group of words that completes the sentences.", - "questions": None, - "type": "multipleChoice", - "part": 1 - } - - umc = { - "id": str(uuid.uuid4()), - "prompt": "Choose the underlined word or group of words that is not correct.", - "questions": None, - "type": "multipleChoice", - "part": 2 - } - - bs_1 = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and write the correct word for each space.", - "questions": None, - "type": "blankSpaceText", - "part": 3 - } - - bs_2 = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and write the correct word for each space.", - "questions": None, - "type": "blankSpaceText", - "part": 4 - } - - reading = { - "id": str(uuid.uuid4()), - "prompt": "Read the text and answer the questions below.", - "questions": None, - "type": "readingExercises", - "part": 5 - } - - all_mc_questions = [] - - # PART 1 - mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions) - print(json.dumps(mc_exercises1, indent=4)) - all_mc_questions.append(mc_exercises1) - - # PART 2 - mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions) - print(json.dumps(mc_exercises2, indent=4)) - all_mc_questions.append(mc_exercises2) - - # PART 3 - mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions) - print(json.dumps(mc_exercises3, indent=4)) - all_mc_questions.append(mc_exercises3) - - mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions'] - print(json.dumps(mc_exercises, indent=4)) - mc["questions"] = mc_exercises - - # Underlined mc - underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46) - print(json.dumps(underlined_mc, indent=4)) - umc["questions"] = underlined_mc - - # Blank Space text 1 - blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250) - print(json.dumps(blank_space_text_1, indent=4)) - bs_1["questions"] = blank_space_text_1 - - # Blank Space text 2 - blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350) - print(json.dumps(blank_space_text_2, indent=4)) - bs_2["questions"] = blank_space_text_2 - - # Reading text - reading_text = await self._gen_reading_passage_utas(87, 10, 4) - print(json.dumps(reading_text, indent=4)) - reading["questions"] = reading_text - - return { - "exercises": { - "blankSpaceMultipleChoice": mc, - "underlinedMultipleChoice": umc, - "blankSpaceText1": bs_1, - "blankSpaceText2": bs_2, - "readingExercises": reading, - }, - "isDiagnostic": False, - "minTimer": 25, - "module": "level" - } - - async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams): - gen_multiple_choice_for_text = ( - f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english ' - 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure ' - 'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, ' - 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.' - ) - - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"questions": [{"id": "9", "options": [{"id": "A", "text": ' - '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' - '"Happy"}, {"id": "D", "text": "Jump"}], ' - '"prompt": "Which of the following is a conjunction?", ' - '"solution": "A", "variant": "text"}]}') - }, - { - "role": "user", - "content": gen_multiple_choice_for_text - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - if len(question["questions"]) != quantity: - return await self._gen_multiple_choice_level(quantity, start_id) - else: - seen_keys = set() - for i in range(len(question["questions"])): - question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas( - all_exams, - question["questions"][i], - question, - seen_keys - ) - return ExercisesHelper.fix_exercise_ids(question, start_id) - - async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys): - # Extracting relevant fields for comparison - key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) - # Check if the key is in the set - if key in seen_keys: - return self._replace_exercise_if_exists_utas( - all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys - ) - else: - seen_keys.add(key) - - for exam in all_exams: - if any( - exercise["prompt"] == current_exercise["prompt"] and - any(exercise["options"][0]["text"] == current_option["text"] for current_option in - current_exercise["options"]) - for exercise in exam.get("questions", []) - ): - return self._replace_exercise_if_exists_utas( - all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys - ) - return current_exercise, seen_keys - - - async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int): - json_format = { - "questions": [ - { - "id": "9", - "options": [ - { - "id": "A", - "text": "a" - }, - { - "id": "B", - "text": "b" - }, - { - "id": "C", - "text": "c" - }, - { - "id": "D", - "text": "d" - } - ], - "prompt": "prompt", - "solution": "A", - "variant": "text" - } - ] - } - - gen_multiple_choice_for_text = ( - f'Generate {str(quantity)} multiple choice questions of 4 options for an english ' - 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that ' - 'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, ' - 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.' - ) - - messages = [ - { - "role": "system", - "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) - }, - { - "role": "user", - "content": gen_multiple_choice_for_text - }, - { - "role": "user", - "content": ( - 'The type of multiple choice is the prompt has wrong words or group of words and the options ' - 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' - 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' - 'the boss is nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' - ) - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - if len(question["questions"]) != quantity: - return await self._gen_multiple_choice_level(quantity, start_id) - else: - return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"] - - async def _gen_blank_space_text_utas( - self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS) - ): - json_format = { - "question": { - "words": [ - { - "id": "1", - "text": "a" - }, - { - "id": "2", - "text": "b" - }, - { - "id": "3", - "text": "c" - }, - { - "id": "4", - "text": "d" - } - ], - "text": "text" - } - } - - messages = [ - { - "role": "system", - "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) - }, - { - "role": "user", - "content": f'Generate a text of at least {str(size)} words about the topic {topic}.' - }, - { - "role": "user", - "content": ( - f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace ' - 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. ' - 'The ids must be ordered throughout the text and the words must be replaced only once. Put ' - 'the removed words and respective ids on the words array of the json in the correct order.' - ) - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - return question["question"] - - async def _gen_reading_passage_utas( - self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS) - ): - - passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic) - short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity) - mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) - return { - "exercises": { - "shortAnswer": short_answer, - "multipleChoice": mc_exercises, - }, - "text": { - "content": passage["text"], - "title": passage["title"] - } - } - - async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int): - json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} - - messages = [ - { - "role": "system", - "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) - }, - { - "role": "user", - "content": ( - 'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have ' - 'maximum 3 words per answer, about this text:\n"' + text + '"') - }, - { - "role": "user", - "content": 'The id starts at ' + str(start_id) + '.' - } - ] - - return ( - await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - )["questions"] - - async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int): - json_format = { - "questions": [ - { - "id": "9", - "options": [ - { - "id": "A", - "text": "a" - }, - { - "id": "B", - "text": "b" - }, - { - "id": "C", - "text": "c" - }, - { - "id": "D", - "text": "d" - } - ], - "prompt": "prompt", - "solution": "A", - "variant": "text" - } - ] - } - - messages = [ - { - "role": "system", - "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) - }, - { - "role": "user", - "content": 'Generate ' + str( - mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text - }, - { - "role": "user", - "content": 'Make sure every question only has 1 correct answer.' - } - ] - - question = await self._llm.prediction( - GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - - if len(question["questions"]) != mc_quantity: - return await self._gen_multiple_choice_level(mc_quantity, start_id) - else: - return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"] diff --git a/app/services/impl/level/__init__.py b/app/services/impl/level/__init__.py new file mode 100644 index 0000000..584a03d --- /dev/null +++ b/app/services/impl/level/__init__.py @@ -0,0 +1,5 @@ +from .level import LevelService + +__all__ = [ + "LevelService" +] \ No newline at end of file diff --git a/app/services/impl/level/custom.py b/app/services/impl/level/custom.py new file mode 100644 index 0000000..09ba991 --- /dev/null +++ b/app/services/impl/level/custom.py @@ -0,0 +1,335 @@ +import queue +import random + +from typing import Dict + +from app.configs.constants import CustomLevelExerciseTypes, EducationalContent +from app.services.abc import ( + ILLMService, ILevelService, IReadingService, + IWritingService, IListeningService, ISpeakingService +) + + +class CustomLevelModule: + + def __init__( + self, + llm: ILLMService, + level: ILevelService, + reading: IReadingService, + listening: IListeningService, + writing: IWritingService, + speaking: ISpeakingService + ): + self._llm = llm + self._level = level + self._reading = reading + self._listening = listening + self._writing = writing + self._speaking = speaking + + # TODO: I've changed this to retrieve the args from the body request and not request query args + async def get_custom_level(self, data: Dict): + nr_exercises = int(data.get('nr_exercises')) + + exercise_id = 1 + response = { + "exercises": {}, + "module": "level" + } + for i in range(1, nr_exercises + 1, 1): + exercise_type = data.get(f'exercise_{i}_type') + exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard'])) + exercise_qty = int(data.get(f'exercise_{i}_qty', -1)) + exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS)) + exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS)) + exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700)) + exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1)) + exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1)) + exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1)) + exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1)) + exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1)) + exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1)) + exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1)) + exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1)) + exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1)) + exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1)) + exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1)) + + if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value: + response["exercises"][f"exercise_{i}"] = {} + response["exercises"][f"exercise_{i}"]["questions"] = [] + response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice" + while exercise_qty > 0: + if exercise_qty - 15 > 0: + qty = 15 + else: + qty = exercise_qty + + mc_response = await self._level.gen_multiple_choice( + "normal", qty, exercise_id, utas=True, + all_exams=response["exercises"][f"exercise_{i}"]["questions"] + ) + response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"]) + exercise_id = exercise_id + qty + exercise_qty = exercise_qty - qty + + elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value: + response["exercises"][f"exercise_{i}"] = {} + response["exercises"][f"exercise_{i}"]["questions"] = [] + response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice" + while exercise_qty > 0: + if exercise_qty - 15 > 0: + qty = 15 + else: + qty = exercise_qty + + mc_response = await self._level.gen_multiple_choice( + "blank_space", qty, exercise_id, utas=True, + all_exams=response["exercises"][f"exercise_{i}"]["questions"] + ) + response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"]) + + exercise_id = exercise_id + qty + exercise_qty = exercise_qty - qty + + elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value: + response["exercises"][f"exercise_{i}"] = {} + response["exercises"][f"exercise_{i}"]["questions"] = [] + response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice" + while exercise_qty > 0: + if exercise_qty - 15 > 0: + qty = 15 + else: + qty = exercise_qty + + mc_response = await self._level.gen_multiple_choice( + "underline", qty, exercise_id, utas=True, + all_exams=response["exercises"][f"exercise_{i}"]["questions"] + ) + response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"]) + + exercise_id = exercise_id + qty + exercise_qty = exercise_qty - qty + + elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value: + response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas( + exercise_qty, exercise_id, exercise_text_size + ) + response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText" + exercise_id = exercise_id + exercise_qty + elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value: + response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas( + exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic + ) + response["exercises"][f"exercise_{i}"]["type"] = "readingExercises" + exercise_id = exercise_id + exercise_qty + elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value: + response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question( + 1, exercise_topic, exercise_difficulty + ) + response["exercises"][f"exercise_{i}"]["type"] = "writing" + exercise_id = exercise_id + 1 + elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value: + response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question( + 2, exercise_topic, exercise_difficulty + ) + response["exercises"][f"exercise_{i}"]["type"] = "writing" + exercise_id = exercise_id + 1 + elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value: + response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part( + 1, exercise_topic, exercise_difficulty, exercise_topic_2 + ) + response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking" + exercise_id = exercise_id + 1 + elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value: + response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part( + 2, exercise_topic, exercise_difficulty + ) + response["exercises"][f"exercise_{i}"]["type"] = "speaking" + exercise_id = exercise_id + 1 + elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value: + response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part( + 3, exercise_topic, exercise_difficulty + ) + response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking" + exercise_id = exercise_id + 1 + elif exercise_type == CustomLevelExerciseTypes.READING_1.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_fillblanks_qty != -1: + exercises.append('fillBlanks') + exercise_qty_q.put(exercise_fillblanks_qty) + total_qty = total_qty + exercise_fillblanks_qty + if exercise_writeblanks_qty != -1: + exercises.append('writeBlanks') + exercise_qty_q.put(exercise_writeblanks_qty) + total_qty = total_qty + exercise_writeblanks_qty + if exercise_truefalse_qty != -1: + exercises.append('trueFalse') + exercise_qty_q.put(exercise_truefalse_qty) + total_qty = total_qty + exercise_truefalse_qty + if exercise_paragraphmatch_qty != -1: + exercises.append('paragraphMatch') + exercise_qty_q.put(exercise_paragraphmatch_qty) + total_qty = total_qty + exercise_paragraphmatch_qty + + response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage( + 1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "reading" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.READING_2.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_fillblanks_qty != -1: + exercises.append('fillBlanks') + exercise_qty_q.put(exercise_fillblanks_qty) + total_qty = total_qty + exercise_fillblanks_qty + if exercise_writeblanks_qty != -1: + exercises.append('writeBlanks') + exercise_qty_q.put(exercise_writeblanks_qty) + total_qty = total_qty + exercise_writeblanks_qty + if exercise_truefalse_qty != -1: + exercises.append('trueFalse') + exercise_qty_q.put(exercise_truefalse_qty) + total_qty = total_qty + exercise_truefalse_qty + if exercise_paragraphmatch_qty != -1: + exercises.append('paragraphMatch') + exercise_qty_q.put(exercise_paragraphmatch_qty) + total_qty = total_qty + exercise_paragraphmatch_qty + + response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage( + 2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "reading" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.READING_3.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_fillblanks_qty != -1: + exercises.append('fillBlanks') + exercise_qty_q.put(exercise_fillblanks_qty) + total_qty = total_qty + exercise_fillblanks_qty + if exercise_writeblanks_qty != -1: + exercises.append('writeBlanks') + exercise_qty_q.put(exercise_writeblanks_qty) + total_qty = total_qty + exercise_writeblanks_qty + if exercise_truefalse_qty != -1: + exercises.append('trueFalse') + exercise_qty_q.put(exercise_truefalse_qty) + total_qty = total_qty + exercise_truefalse_qty + if exercise_paragraphmatch_qty != -1: + exercises.append('paragraphMatch') + exercise_qty_q.put(exercise_paragraphmatch_qty) + total_qty = total_qty + exercise_paragraphmatch_qty + if exercise_ideamatch_qty != -1: + exercises.append('ideaMatch') + exercise_qty_q.put(exercise_ideamatch_qty) + total_qty = total_qty + exercise_ideamatch_qty + + response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage( + 3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty + ) + response["exercises"][f"exercise_{i}"]["type"] = "reading" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_mc_qty != -1: + exercises.append('multipleChoice') + exercise_qty_q.put(exercise_mc_qty) + total_qty = total_qty + exercise_mc_qty + if exercise_writeblanksquestions_qty != -1: + exercises.append('writeBlanksQuestions') + exercise_qty_q.put(exercise_writeblanksquestions_qty) + total_qty = total_qty + exercise_writeblanksquestions_qty + if exercise_writeblanksfill_qty != -1: + exercises.append('writeBlanksFill') + exercise_qty_q.put(exercise_writeblanksfill_qty) + total_qty = total_qty + exercise_writeblanksfill_qty + if exercise_writeblanksform_qty != -1: + exercises.append('writeBlanksForm') + exercise_qty_q.put(exercise_writeblanksform_qty) + total_qty = total_qty + exercise_writeblanksform_qty + + response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question( + 1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "listening" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_mc_qty != -1: + exercises.append('multipleChoice') + exercise_qty_q.put(exercise_mc_qty) + total_qty = total_qty + exercise_mc_qty + if exercise_writeblanksquestions_qty != -1: + exercises.append('writeBlanksQuestions') + exercise_qty_q.put(exercise_writeblanksquestions_qty) + total_qty = total_qty + exercise_writeblanksquestions_qty + + response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question( + 2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "listening" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_mc3_qty != -1: + exercises.append('multipleChoice3Options') + exercise_qty_q.put(exercise_mc3_qty) + total_qty = total_qty + exercise_mc3_qty + if exercise_writeblanksquestions_qty != -1: + exercises.append('writeBlanksQuestions') + exercise_qty_q.put(exercise_writeblanksquestions_qty) + total_qty = total_qty + exercise_writeblanksquestions_qty + + response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question( + 3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "listening" + + exercise_id = exercise_id + total_qty + elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value: + exercises = [] + exercise_qty_q = queue.Queue() + total_qty = 0 + if exercise_mc_qty != -1: + exercises.append('multipleChoice') + exercise_qty_q.put(exercise_mc_qty) + total_qty = total_qty + exercise_mc_qty + if exercise_writeblanksquestions_qty != -1: + exercises.append('writeBlanksQuestions') + exercise_qty_q.put(exercise_writeblanksquestions_qty) + total_qty = total_qty + exercise_writeblanksquestions_qty + if exercise_writeblanksfill_qty != -1: + exercises.append('writeBlanksFill') + exercise_qty_q.put(exercise_writeblanksfill_qty) + total_qty = total_qty + exercise_writeblanksfill_qty + if exercise_writeblanksform_qty != -1: + exercises.append('writeBlanksForm') + exercise_qty_q.put(exercise_writeblanksform_qty) + total_qty = total_qty + exercise_writeblanksform_qty + + response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question( + 4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id + ) + response["exercises"][f"exercise_{i}"]["type"] = "listening" + + exercise_id = exercise_id + total_qty + + return response diff --git a/app/services/impl/level/level.py b/app/services/impl/level/level.py new file mode 100644 index 0000000..285a11e --- /dev/null +++ b/app/services/impl/level/level.py @@ -0,0 +1,417 @@ +import json +import random +import uuid + +from typing import Dict + +from fastapi import UploadFile + +from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent +from app.helpers import ExercisesHelper +from app.repositories.abc import IDocumentStore +from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \ + IListeningService +from .custom import CustomLevelModule +from .upload import UploadLevelModule + + +class LevelService(ILevelService): + + def __init__( + self, + llm: ILLMService, + document_store: IDocumentStore, + mc_variants: Dict, + reading_service: IReadingService, + writing_service: IWritingService, + speaking_service: ISpeakingService, + listening_service: IListeningService + ): + self._llm = llm + self._document_store = document_store + self._reading_service = reading_service + self._custom_module = CustomLevelModule( + llm, self, reading_service, listening_service, writing_service, speaking_service + ) + self._upload_module = UploadLevelModule(llm) + + # TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt + # mc_variants are stored in ./mc_variants.json + self._mc_variants = mc_variants + + async def upload_level(self, upload: UploadFile) -> Dict: + return await self._upload_module.generate_level_from_file(upload) + + async def get_custom_level(self, data: Dict): + return await self._custom_module.get_custom_level(data) + + async def get_level_exam( + self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False + ) -> Dict: + exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False) + return { + "exercises": [exercises], + "isDiagnostic": diagnostic, + "minTimer": min_timer, + "module": "level" + } + + async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25): + # Formats + mc = { + "id": str(uuid.uuid4()), + "prompt": "Choose the correct word or group of words that completes the sentences.", + "questions": None, + "type": "multipleChoice", + "part": 1 + } + + umc = { + "id": str(uuid.uuid4()), + "prompt": "Choose the underlined word or group of words that is not correct.", + "questions": None, + "type": "multipleChoice", + "part": 2 + } + + bs_1 = { + "id": str(uuid.uuid4()), + "prompt": "Read the text and write the correct word for each space.", + "questions": None, + "type": "blankSpaceText", + "part": 3 + } + + bs_2 = { + "id": str(uuid.uuid4()), + "prompt": "Read the text and write the correct word for each space.", + "questions": None, + "type": "blankSpaceText", + "part": 4 + } + + reading = { + "id": str(uuid.uuid4()), + "prompt": "Read the text and answer the questions below.", + "questions": None, + "type": "readingExercises", + "part": 5 + } + + all_mc_questions = [] + + # PART 1 + # await self._gen_multiple_choice("normal", number_of_exercises, utas=False) + mc_exercises1 = await self.gen_multiple_choice( + "blank_space", 15, 1, utas=True, all_exams=all_mc_questions + ) + print(json.dumps(mc_exercises1, indent=4)) + all_mc_questions.append(mc_exercises1) + + # PART 2 + mc_exercises2 = await self.gen_multiple_choice( + "blank_space", 15, 16, utas=True, all_exams=all_mc_questions + ) + print(json.dumps(mc_exercises2, indent=4)) + all_mc_questions.append(mc_exercises2) + + # PART 3 + mc_exercises3 = await self.gen_multiple_choice( + "blank_space", 15, 31, utas=True, all_exams=all_mc_questions + ) + print(json.dumps(mc_exercises3, indent=4)) + all_mc_questions.append(mc_exercises3) + + mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions'] + print(json.dumps(mc_exercises, indent=4)) + mc["questions"] = mc_exercises + + # Underlined mc + underlined_mc = await self.gen_multiple_choice( + "underline", 15, 46, utas=True, all_exams=all_mc_questions + ) + print(json.dumps(underlined_mc, indent=4)) + umc["questions"] = underlined_mc + + # Blank Space text 1 + blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250) + print(json.dumps(blank_space_text_1, indent=4)) + bs_1["questions"] = blank_space_text_1 + + # Blank Space text 2 + blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350) + print(json.dumps(blank_space_text_2, indent=4)) + bs_2["questions"] = blank_space_text_2 + + # Reading text + reading_text = await self.gen_reading_passage_utas(87, 10, 4) + print(json.dumps(reading_text, indent=4)) + reading["questions"] = reading_text + + return { + "exercises": { + "blankSpaceMultipleChoice": mc, + "underlinedMultipleChoice": umc, + "blankSpaceText1": bs_1, + "blankSpaceText2": bs_2, + "readingExercises": reading, + }, + "isDiagnostic": diagnostic, + "minTimer": min_timer, + "module": "level" + } + + async def gen_multiple_choice( + self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None + ): + mc_template = self._mc_variants[mc_variant] + blank_mod = " blank space " if mc_variant == "blank_space" else " " + + gen_multiple_choice_for_text: str = ( + 'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy ' + 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover ' + 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and ' + 'punctuation. Make sure every question only has 1 correct answer.' + ) + + messages = [ + { + "role": "system", + "content": ( + f'You are a helpful assistant designed to output JSON on this format: {mc_template}' + ) + }, + { + "role": "user", + "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod) + } + ] + + if mc_variant == "underline": + messages.append({ + "role": "user", + "content": ( + 'The type of multiple choice in the prompt has wrong words or group of words and the options ' + 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' + 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' + 'the boss is nice."\n' + 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' + ) + }) + + question = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + if len(question["questions"]) != quantity: + return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams) + else: + if not utas: + all_exams = await self._document_store.get_all("level") + seen_keys = set() + for i in range(len(question["questions"])): + question["questions"][i], seen_keys = await self._replace_exercise_if_exists( + all_exams, question["questions"][i], question, seen_keys, mc_variant, utas + ) + return { + "id": str(uuid.uuid4()), + "prompt": "Select the appropriate option.", + "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"], + "type": "multipleChoice", + } + else: + if all_exams is not None: + seen_keys = set() + for i in range(len(question["questions"])): + question["questions"][i], seen_keys = await self._replace_exercise_if_exists( + all_exams, question["questions"][i], question, seen_keys, mc_variant, utas + ) + response = ExercisesHelper.fix_exercise_ids(question, start_id) + response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"]) + return response + + async def _generate_single_multiple_choice(self, mc_variant: str = "normal"): + mc_template = self._mc_variants[mc_variant]["questions"][0] + blank_mod = " blank space " if mc_variant == "blank_space" else " " + + messages = [ + { + "role": "system", + "content": ( + f'You are a helpful assistant designed to output JSON on this format: {mc_template}' + ) + }, + { + "role": "user", + "content": ( + f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, ' + f'it can be easy, intermediate or advanced.' + ) + + } + ] + + if mc_variant == "underline": + messages.append({ + "role": "user", + "content": ( + 'The type of multiple choice in the prompt has wrong words or group of words and the options ' + 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' + 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' + 'the boss is nice."\n' + 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' + ) + }) + + question = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + return question + + async def _replace_exercise_if_exists( + self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False + ): + # Extracting relevant fields for comparison + key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) + # Check if the key is in the set + if key in seen_keys: + return await self._replace_exercise_if_exists( + all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys, + mc_variant, utas + ) + else: + seen_keys.add(key) + + if not utas: + for exam in all_exams: + exam_dict = exam.to_dict() + if len(exam_dict.get("parts", [])) > 0: + exercise_dict = exam_dict.get("parts", [])[0] + if len(exercise_dict.get("exercises", [])) > 0: + if any( + exercise["prompt"] == current_exercise["prompt"] and + any(exercise["options"][0]["text"] == current_option["text"] for current_option in + current_exercise["options"]) + for exercise in exercise_dict.get("exercises", [])[0]["questions"] + ): + return await self._replace_exercise_if_exists( + all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, + seen_keys, mc_variant, utas + ) + else: + for exam in all_exams: + if any( + exercise["prompt"] == current_exercise["prompt"] and + any(exercise["options"][0]["text"] == current_option["text"] for current_option in + current_exercise["options"]) + for exercise in exam.get("questions", []) + ): + return await self._replace_exercise_if_exists( + all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, + seen_keys, mc_variant, utas + ) + return current_exercise, seen_keys + + async def gen_blank_space_text_utas( + self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS) + ): + json_template = self._mc_variants["blank_space_text"] + messages = [ + { + "role": "system", + "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}' + }, + { + "role": "user", + "content": f'Generate a text of at least {size} words about the topic {topic}.' + }, + { + "role": "user", + "content": ( + f'From the generated text choose {quantity} words (cannot be sequential words) to replace ' + 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. ' + 'The ids must be ordered throughout the text and the words must be replaced only once. ' + 'Put the removed words and respective ids on the words array of the json in the correct order.' + ) + } + ] + + question = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + return question["question"] + + async def gen_reading_passage_utas( + self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS) + ): + passage = await self._reading_service.generate_reading_passage(1, topic) + short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity) + mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) + return { + "exercises": { + "shortAnswer": short_answer, + "multipleChoice": mc_exercises, + }, + "text": { + "content": passage["text"], + "title": passage["title"] + } + } + + async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int): + json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} + + messages = [ + { + "role": "system", + "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}' + }, + { + "role": "user", + "content": ( + f'Generate {sa_quantity} short answer questions, and the possible answers, must have ' + f'maximum 3 words per answer, about this text:\n"{text}"' + ) + }, + { + "role": "user", + "content": f'The id starts at {start_id}.' + } + ] + + question = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + return question["questions"] + + async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int): + json_template = self._mc_variants["text_mc_utas"] + + messages = [ + { + "role": "system", + "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}' + }, + { + "role": "user", + "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}' + }, + { + "role": "user", + "content": 'Make sure every question only has 1 correct answer.' + } + ] + + question = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + if len(question["questions"]) != mc_quantity: + return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id) + else: + response = ExercisesHelper.fix_exercise_ids(question, start_id) + response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"]) + return response diff --git a/app/services/impl/level/mc_variants.json b/app/services/impl/level/mc_variants.json new file mode 100644 index 0000000..3b9c55b --- /dev/null +++ b/app/services/impl/level/mc_variants.json @@ -0,0 +1,137 @@ +{ + "normal": { + "questions": [ + { + "id": "9", + "options": [ + { + "id": "A", + "text": "And" + }, + { + "id": "B", + "text": "Cat" + }, + { + "id": "C", + "text": "Happy" + }, + { + "id": "D", + "text": "Jump" + } + ], + "prompt": "Which of the following is a conjunction?", + "solution": "A", + "variant": "text" + } + ] + }, + "blank_space": { + "questions": [ + { + "id": "9", + "options": [ + { + "id": "A", + "text": "And" + }, + { + "id": "B", + "text": "Cat" + }, + { + "id": "C", + "text": "Happy" + }, + { + "id": "D", + "text": "Jump" + } + ], + "prompt": "Which of the following is a conjunction?", + "solution": "A", + "variant": "text" + } + ] + }, + "underline": { + "questions": [ + { + "id": "9", + "options": [ + { + "id": "A", + "text": "a" + }, + { + "id": "B", + "text": "b" + }, + { + "id": "C", + "text": "c" + }, + { + "id": "D", + "text": "d" + } + ], + "prompt": "prompt", + "solution": "A", + "variant": "text" + } + ] + }, + "blank_space_text": { + "question": { + "words": [ + { + "id": "1", + "text": "a" + }, + { + "id": "2", + "text": "b" + }, + { + "id": "3", + "text": "c" + }, + { + "id": "4", + "text": "d" + } + ], + "text": "text" + } + }, + "text_mc_utas": { + "questions": [ + { + "id": "9", + "options": [ + { + "id": "A", + "text": "a" + }, + { + "id": "B", + "text": "b" + }, + { + "id": "C", + "text": "c" + }, + { + "id": "D", + "text": "d" + } + ], + "prompt": "prompt", + "solution": "A", + "variant": "text" + } + ] + } +} \ No newline at end of file diff --git a/app/services/impl/level/upload.py b/app/services/impl/level/upload.py new file mode 100644 index 0000000..ee2d326 --- /dev/null +++ b/app/services/impl/level/upload.py @@ -0,0 +1,404 @@ +import aiofiles +import os +import uuid +from logging import getLogger + +from typing import Dict, Any, Tuple, Coroutine + +import pdfplumber +from fastapi import UploadFile + +from app.services.abc import ILLMService +from app.helpers import LoggerHelper, FileHelper +from app.mappers import ExamMapper + +from app.dtos.exam import Exam +from app.dtos.sheet import Sheet + + +class UploadLevelModule: + def __init__(self, openai: ILLMService): + self._logger = getLogger(__name__) + self._llm = openai + + # TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in + # firestore, return the id right away, in generation view poll for the id + async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None: + ext, path_id = await self._save_upload(file) + FileHelper.convert_file_to_pdf( + f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf' + ) + file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf') + + if not file_has_images: + FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html') + + completion: Coroutine[Any, Any, Exam] = ( + self._png_completion(path_id) if file_has_images else self._html_completion(path_id) + ) + response = await completion + + FileHelper.remove_directory(f'./tmp/{path_id}') + + if response: + return self.fix_ids(response.dict(exclude_none=True)) + return None + + @staticmethod + @LoggerHelper.suppress_loggers() + def _check_pdf_for_images(pdf_path: str) -> bool: + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + if page.images: + return True + return False + + @staticmethod + async def _save_upload(file: UploadFile) -> Tuple[str, str]: + ext = file.filename.split('.')[-1] + path_id = str(uuid.uuid4()) + os.makedirs(f'./tmp/{path_id}', exist_ok=True) + + tmp_filename = f'./tmp/{path_id}/uploaded.{ext}' + file_bytes: bytes = await file.read() + + async with aiofiles.open(tmp_filename, 'wb') as file: + await file.write(file_bytes) + + return ext, path_id + + def _level_json_schema(self): + return { + "parts": [ + { + "context": "", + "exercises": [ + self._multiple_choice_html(), + self._passage_blank_space_html() + ] + } + ] + } + + async def _html_completion(self, path_id: str) -> Exam: + async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f: + html = await f.read() + + return await self._llm.pydantic_prediction( + [self._gpt_instructions_html(), + { + "role": "user", + "content": html + } + ], + ExamMapper.map_to_exam_model, + str(self._level_json_schema()) + ) + + def _gpt_instructions_html(self): + return { + "role": "system", + "content": ( + 'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.' + 'Your current task is to scrape html english questions sheets.\n\n' + + 'In the question sheet you will only see 4 types of question:\n' + '- blank space multiple choice\n' + '- underline multiple choice\n' + '- reading passage blank space multiple choice\n' + '- reading passage multiple choice\n\n' + + 'For the first two types of questions the template is the same but the question prompts differ, ' + 'whilst in the blank space multiple choice you must include in the prompt the blank spaces with ' + 'multiple "_", in the underline you must include in the prompt the to ' + 'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n' + + 'For the reading passage exercise you must handle the formatting of the passages. If it is a ' + 'reading passage with blank spaces you will see blanks represented with (question id) followed by a ' + 'line and your job is to replace the brackets with the question id and line with "{{question id}}" ' + 'with 2 newlines between paragraphs. For the reading passages without blanks you must remove ' + 'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines ' + 'between paragraphs.\n\n' + + 'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph ' + 'structure, don\'t format the reading passages paragraphs only by the

tags, try to figure ' + 'out the best paragraph separation possible.' + + 'You will place all the information in a single JSON: ' + '{"parts": [{"exercises": [{...}], "context": ""}]}\n ' + 'Where {...} are the exercises templates for each part of a question sheet and the optional field ' + 'context.' + + 'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, ' + 'so that you can group the exercises by the parts that are in the html, this is crucial since only ' + 'reading passage multiple choice require context and if the context is included in parts where it ' + 'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n' + + 'The templates for the exercises are the following:\n' + '- blank space multiple choice, underline multiple choice and reading passage multiple choice: ' + f'{self._multiple_choice_html()}\n' + f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n' + + 'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading ' + 'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other ' + 'exercises exclude the context field.' + ) + } + + @staticmethod + def _multiple_choice_html(): + return { + "type": "multipleChoice", + "prompt": "Select the appropriate option.", + "questions": [ + { + "id": "", + "prompt": "", + "solution": "", + "options": [ + { + "id": "A", + "text": "" + }, + { + "id": "B", + "text": "" + }, + { + "id": "C", + "text": "" + }, + { + "id": "D", + "text": "" + } + ] + } + ] + } + + @staticmethod + def _passage_blank_space_html(): + return { + "type": "fillBlanks", + "variant": "mc", + "prompt": "Click a blank to select the appropriate word for it.", + "text": ( + "}} with 2 newlines between paragraphs>" + ), + "solutions": [ + { + "id": "", + "solution": "" + } + ], + "words": [ + { + "id": "", + "options": { + "A": "", + "B": "", + "C": "", + "D": "" + } + } + ] + } + + async def _png_completion(self, path_id: str) -> Exam: + FileHelper.pdf_to_png(path_id) + + tmp_files = os.listdir(f'./tmp/{path_id}') + pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')] + pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0])) + + json_schema = { + "components": [ + {"type": "part", "part": ""}, + self._multiple_choice_png(), + {"type": "blanksPassage", "text": ( + "}} with 2 newlines between paragraphs>" + )}, + {"type": "passage", "context": ( + "" + )}, + self._passage_blank_space_png() + ] + } + + components = [] + + for i in range(len(pages)): + current_page = pages[i] + next_page = pages[i + 1] if i + 1 < len(pages) else None + batch = [current_page, next_page] if next_page else [current_page] + + sheet = await self._png_batch(path_id, batch, json_schema) + sheet.batch = i + 1 + components.append(sheet.dict()) + + batches = {"batches": components} + + return await self._batches_to_exam_completion(batches) + + async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet: + return await self._llm.pydantic_prediction( + [self._gpt_instructions_png(), + { + "role": "user", + "content": [ + *FileHelper.b64_pngs(path_id, files) + ] + } + ], + ExamMapper.map_to_sheet, + str(json_schema) + ) + + def _gpt_instructions_png(self): + return { + "role": "system", + "content": ( + 'You are GPT OCR and your job is to scan image text data and format it to JSON format.' + 'Your current task is to scan english questions sheets.\n\n' + + 'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of ' + 'sheet components you will retrieve from the images, the components and their corresponding JSON ' + 'templates are as follows:\n' + + '- Part, a standalone part or part of a section of the question sheet: ' + '{"type": "part", "part": ""}\n' + + '- Multiple Choice Question, there are three types of multiple choice questions that differ on ' + 'the prompt field of the template: blanks, underlines and normal. ' + + 'In the blanks prompt you must leave 5 underscores to represent the blank space. ' + 'In the underlines questions the objective is to pick the words that are incorrect in the given ' + 'sentence, for these questions you must wrap the answer to the question with the html tag , ' + 'choose 3 other words to wrap in , place them in the prompt field and use the underlined words ' + 'in the order they appear in the question for the options A to D, disreguard options that might be ' + 'included underneath the underlines question and use the ones you wrapped in .' + 'In normal you just leave the question as is. ' + + f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n' + + '- Reading Passages, there are two types of reading passages. Reading passages where you will see ' + 'blanks represented by a (question id) followed by a line, you must format these types of reading ' + 'passages to be only the text with the brackets that have the question id and line replaced with ' + '"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks ' + 'you must remove any numbers that may be there to specify paragraph numbers or line numbers, ' + 'and place 2 newlines between paragraphs. ' + + 'For the reading passages with blanks the template is: {"type": "blanksPassage", ' + '"text": "}} also place 2 newlines between paragraphs>"}. ' + + 'For the reading passage without blanks is: {"type": "passage", "context": ""}\n' + + '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of ' + 'options with the question id and the options from a to d. The template is: ' + f'{self._passage_blank_space_png()}\n' + + 'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or ' + 'reading passages are cut off don\'t include them in the JSON.' + ) + } + + def _multiple_choice_png(self): + multiple_choice = self._multiple_choice_html()["questions"][0] + multiple_choice["type"] = "multipleChoice" + multiple_choice.pop("solution") + return multiple_choice + + def _passage_blank_space_png(self): + passage_blank_space = self._passage_blank_space_html()["words"][0] + passage_blank_space["type"] = "fillBlanks" + return passage_blank_space + + async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam: + return await self._llm.pydantic_prediction( + [self._gpt_instructions_html(), + { + "role": "user", + "content": str(batches) + } + ], + ExamMapper.map_to_exam_model, + str(self._level_json_schema()) + ) + + def _gpt_instructions_batches(self): + return { + "role": "system", + "content": ( + 'You are helpfull assistant. Your task is to merge multiple batches of english question sheet ' + 'components and solve the questions. Each batch may contain overlapping content with the previous ' + 'batch, or close enough content which needs to be excluded. The components are as follows:' + + '- Part, a standalone part or part of a section of the question sheet: ' + '{"type": "part", "part": ""}\n' + + '- Multiple Choice Question, there are three types of multiple choice questions that differ on ' + 'the prompt field of the template: blanks, underlines and normal. ' + + 'In a blanks question, the prompt has underscores to represent the blank space, you must select the ' + 'appropriate option to solve it.' + + 'In a underlines question, the prompt has 4 underlines represented by the html tags , you must ' + 'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect ' + 'the order in which the underlines appear in the prompt you will need to fix it.' + + 'In a normal question there isn\'t either blanks or underlines in the prompt, you should just ' + 'select the appropriate solution.' + + f'The template for these questions is the same: {self._multiple_choice_png()}\n' + + '- Reading Passages, there are two types of reading passages with different templates. The one with ' + 'type "blanksPassage" where the text field holds the passage and a blank is represented by ' + '{{}} and the other one with type "passage" that has the context field with just ' + 'reading passages. For both of these components you will have to remove any additional data that might ' + 'be related to a question description and also remove some "()" and "_" from blanksPassage' + ' if there are any. These components are used in conjunction with other ones.' + + '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of ' + 'options with the question id and the options from a to d. The template is: ' + f'{self._passage_blank_space_png()}\n\n' + + 'Now that you know the possible components here\'s what I want you to do:\n' + '1. Remove duplicates. A batch will have duplicates of other batches and the components of ' + 'the next batch should always take precedence over the previous one batch, what I mean by this is that ' + 'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, ' + 'you pick the next one.\n' + '2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks ' + 'exercise. For the multiple choice question follow the previous instruction to solve them and place ' + f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match ' + 'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is ' + f'the template for this exercise: {self._passage_blank_space_html()}.\n' + f'3. Restructure the JSON to match this template: {self._level_json_schema()}. ' + f'You must group the exercises by the parts in the order they appear in the batches components. ' + f'The context field of a part is the context of a passage component that has text relevant to normal ' + f'multiple choice questions.\n' + + 'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions' + 'in your response and correctly structure the JSON.' + ) + } + + @staticmethod + def fix_ids(response): + counter = 1 + for part in response["parts"]: + for exercise in part["exercises"]: + if exercise["type"] == "multipleChoice": + for question in exercise["questions"]: + question["id"] = counter + counter += 1 + if exercise["type"] == "fillBlanks": + for i in range(len(exercise["words"])): + exercise["words"][i]["id"] = counter + exercise["solutions"][i]["id"] = counter + counter += 1 + return response diff --git a/app/services/impl/listening.py b/app/services/impl/listening.py index af8f789..14c7a07 100644 --- a/app/services/impl/listening.py +++ b/app/services/impl/listening.py @@ -1,15 +1,18 @@ +import queue import uuid +from logging import getLogger from queue import Queue import random -from typing import Dict +from typing import Dict, List from app.repositories.abc import IFileStorage, IDocumentStore from app.services.abc import IListeningService, ILLMService, ITextToSpeechService from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate from app.configs.constants import ( - NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant + NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent, + FieldsAndExercises ) -from app.helpers import ExercisesHelper +from app.helpers import ExercisesHelper, FileHelper class ListeningService(IListeningService): @@ -33,25 +36,83 @@ class ListeningService(IListeningService): self._tts = tts self._file_storage = file_storage self._document_store = document_store + self._logger = getLogger(__name__) self._sections = { "section_1": { + "topic": EducationalContent.TWO_PEOPLE_SCENARIOS, + "exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES, + "exercise_sample_size": 1, + "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES, + "start_id": 1, "generate_dialogue": self._generate_listening_conversation, - "type": "conversation" + "type": "conversation", }, "section_2": { + "topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS, + "exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES, + "exercise_sample_size": 2, + "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES, + "start_id": 11, "generate_dialogue": self._generate_listening_monologue, - "type": "monologue" + "type": "monologue", }, "section_3": { + "topic": EducationalContent.FOUR_PEOPLE_SCENARIOS, + "exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES, + "exercise_sample_size": 1, + "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES, + "start_id": 21, "generate_dialogue": self._generate_listening_conversation, - "type": "conversation" + "type": "conversation", }, "section_4": { + "topic": EducationalContent.ACADEMIC_SUBJECTS, + "exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES, + "exercise_sample_size": 2, + "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES, + "start_id": 31, "generate_dialogue": self._generate_listening_monologue, "type": "monologue" } } + async def get_listening_question( + self, section_id: int, topic: str, req_exercises: List[str], difficulty: str, + number_of_exercises_q=queue.Queue(), start_id=-1 + ): + FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) + section = self._sections[f"section_{section_id}"] + if not topic: + topic = random.choice(section["topic"]) + + if len(req_exercises) == 0: + req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"]) + + if number_of_exercises_q.empty(): + number_of_exercises_q = ExercisesHelper.divide_number_into_parts( + section["total_exercises"], len(req_exercises) + ) + + if start_id == -1: + start_id = section["start_id"] + + dialog = await self.generate_listening_question(section_id, topic) + + if section_id in {1, 3}: + dialog = self.parse_conversation(dialog) + + self._logger.info(f'Generated {section["type"]}: {dialog}') + + exercises = await self.generate_listening_exercises( + section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty + ) + + return { + "exercises": exercises, + "text": dialog, + "difficulty": difficulty + } + async def generate_listening_question(self, section: int, topic: str): return await self._sections[f'section_{section}']["generate_dialogue"](section, topic) @@ -67,9 +128,10 @@ class ListeningService(IListeningService): for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() - if req_exercise == "multipleChoice": + if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options": + n_options = 4 if "multipleChoice" else 3 question = await self._gen_multiple_choice_exercise_listening( - dialog_type, dialog, number_of_exercises, start_id, difficulty + dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options ) exercises.append(question) @@ -100,10 +162,9 @@ class ListeningService(IListeningService): return exercises - async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str): + async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str): template = getListeningTemplate() template['difficulty'] = difficulty - listening_id = str(uuid.uuid4()) for i, part in enumerate(parts, start=0): part_template = getListeningPartTemplate() @@ -127,8 +188,8 @@ class ListeningService(IListeningService): else: template["variant"] = ExamVariant.FULL.value - (result, listening_id) = await self._document_store.save_to_db_with_id("listening", template, listening_id) - if result: + listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id) + if listening_id: return {**template, "id": listening_id} else: raise Exception("Failed to save question: " + str(parts)) @@ -160,6 +221,20 @@ class ListeningService(IListeningService): } ] + if section == 1: + messages.extend([ + { + "role": "user", + "content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).' + + }, + { + "role": "user", + "content": 'Try to have spelling of names (cities, people, etc)' + + } + ]) + response = await self._llm.prediction( GPTModels.GPT_4_O, messages, @@ -170,7 +245,11 @@ class ListeningService(IListeningService): return self._get_conversation_voices(response, True) async def _generate_listening_monologue(self, section: int, topic: str) -> Dict: - context = 'social context' if section == 2 else 'academic subject' + head = ( + 'Generate a comprehensive monologue set in the social context of' + if section == 2 else + 'Generate a comprehensive and complex monologue on the academic subject of' + ) messages = [ { @@ -182,7 +261,7 @@ class ListeningService(IListeningService): { "role": "user", "content": ( - f'Generate a comprehensive monologue set in the {context} of "{topic}". {self.MONOLOGUE_TAIL}' + f'{head}: "{topic}". {self.MONOLOGUE_TAIL}' ) } ] @@ -233,7 +312,7 @@ class ListeningService(IListeningService): # ================================================================================================================== async def _gen_multiple_choice_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty + self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4 ): messages = [ { @@ -248,8 +327,8 @@ class ListeningService(IListeningService): { "role": "user", "content": ( - f'Generate {str(quantity)} {difficulty} difficulty multiple choice questions of 4 options ' - f'for this {dialog_type}:\n"' + text + '"') + f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} ' + f'options for this {dialog_type}:\n"' + text + '"') } ] @@ -268,7 +347,7 @@ class ListeningService(IListeningService): } async def _gen_write_blanks_questions_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty + self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str ): messages = [ { @@ -280,7 +359,7 @@ class ListeningService(IListeningService): { "role": "user", "content": ( - f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the ' + f'Generate {quantity} {difficulty} difficulty short answer questions, and the ' f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"') } ] @@ -300,7 +379,7 @@ class ListeningService(IListeningService): } async def _gen_write_blanks_notes_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty + self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str ): messages = [ { @@ -312,7 +391,7 @@ class ListeningService(IListeningService): { "role": "user", "content": ( - f'Generate {str(quantity)} {difficulty} difficulty notes taken from this ' + f'Generate {quantity} {difficulty} difficulty notes taken from this ' f'{dialog_type}:\n"{text}"' ) @@ -357,7 +436,7 @@ class ListeningService(IListeningService): } async def _gen_write_blanks_form_exercise_listening( - self, dialog_type: str, text: str, quantity: int, start_id, difficulty + self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str ): messages = [ { @@ -369,12 +448,21 @@ class ListeningService(IListeningService): { "role": "user", "content": ( - f'Generate a form with {str(quantity)} {difficulty} difficulty key-value pairs ' + f'Generate a form with {quantity} {difficulty} difficulty key-value pairs ' f'about this {dialog_type}:\n"{text}"' ) } ] + if dialog_type == "conversation": + messages.append({ + "role": "user", + "content": ( + 'It must be a form and not questions. ' + 'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}' + ) + }) + parsed_form = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) @@ -391,3 +479,14 @@ class ListeningService(IListeningService): "type": "writeBlanks" } + @staticmethod + def parse_conversation(conversation_data): + conversation_list = conversation_data.get('conversation', []) + readable_text = [] + + for message in conversation_list: + name = message.get('name', 'Unknown') + text = message.get('text', '') + readable_text.append(f"{name}: {text}") + + return "\n".join(readable_text) \ No newline at end of file diff --git a/app/services/impl/reading.py b/app/services/impl/reading.py index 243d382..50b136d 100644 --- a/app/services/impl/reading.py +++ b/app/services/impl/reading.py @@ -12,42 +12,25 @@ class ReadingService(IReadingService): def __init__(self, llm: ILLMService): self._llm = llm - self._passages = { - "passage_1": { - "question_type": QuestionType.READING_PASSAGE_1, - "start_id": 1 - }, - "passage_2": { - "question_type": QuestionType.READING_PASSAGE_2, - "start_id": 14 - }, - "passage_3": { - "question_type": QuestionType.READING_PASSAGE_3, - "start_id": 27 - } - } async def gen_reading_passage( self, - passage_id: int, + part: int, topic: str, req_exercises: List[str], number_of_exercises_q: Queue, - difficulty: str + difficulty: str, + start_id: int ): - _passage = self._passages[f'passage_{str(passage_id)}'] - - passage = await self.generate_reading_passage(_passage["question_type"], topic) - - if passage == "": - return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty) - - start_id = _passage["start_id"] + passage = await self.generate_reading_passage(part, topic) exercises = await self._generate_reading_exercises( passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty ) + if ExercisesHelper.contains_empty_dict(exercises): - return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty) + return await self.gen_reading_passage( + part, topic, req_exercises, number_of_exercises_q, difficulty, start_id + ) return { "exercises": exercises, @@ -58,7 +41,17 @@ class ReadingService(IReadingService): "difficulty": difficulty } - async def generate_reading_passage(self, q_type: QuestionType, topic: str): + async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800): + part_system_message = { + "1": 'The generated text should be fairly easy to understand and have multiple paragraphs.', + "2": 'The generated text should be fairly hard to understand and have multiple paragraphs.', + "3": ( + 'The generated text should be very hard to understand and include different points, theories, ' + 'subtle differences of opinions from people, correctly sourced to the person who said it, ' + 'over the specified topic and have multiple paragraphs.' + ) + } + messages = [ { "role": "system", @@ -69,17 +62,26 @@ class ReadingService(IReadingService): { "role": "user", "content": ( - f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, ' - f'on the topic of "{topic}". The passage should offer a substantial amount of ' - 'information, analysis, or narrative relevant to the chosen subject matter. This text ' - 'passage aims to serve as the primary reading section of an IELTS test, providing an ' - 'in-depth and comprehensive exploration of the topic. Make sure that the generated text ' - 'does not contain forbidden subjects in muslim countries.' + f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, ' + f'on the topic of "{topic}". The passage should offer a substantial amount of ' + 'information, analysis, or narrative relevant to the chosen subject matter. This text ' + 'passage aims to serve as the primary reading section of an IELTS test, providing an ' + 'in-depth and comprehensive exploration of the topic. Make sure that the generated text ' + 'does not contain forbidden subjects in muslim countries.' ) - + }, + { + "role": "system", + "content": part_system_message[str(part)] } ] + if part == 3: + messages.append({ + "role": "user", + "content": "Use real text excerpts on you generated passage and cite the sources." + }) + return await self._llm.prediction( GPTModels.GPT_4_O, messages, @@ -95,11 +97,15 @@ class ReadingService(IReadingService): number_of_exercises = number_of_exercises_q.get() if req_exercise == "fillBlanks": - question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty) + question = await self._gen_summary_fill_blanks_exercise( + passage, number_of_exercises, start_id, difficulty + ) exercises.append(question) print("Added fill blanks: " + str(question)) elif req_exercise == "trueFalse": - question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty) + question = await self._gen_true_false_not_given_exercise( + passage, number_of_exercises, start_id, difficulty + ) exercises.append(question) print("Added trueFalse: " + str(question)) elif req_exercise == "writeBlanks": @@ -114,32 +120,28 @@ class ReadingService(IReadingService): question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id) exercises.append(question) print("Added paragraph match: " + str(question)) + elif req_exercise == "ideaMatch": + question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id) + exercises.append(question) + print("Added idea match: " + str(question)) start_id = start_id + number_of_exercises return exercises - async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty): + async def _gen_summary_fill_blanks_exercise( + self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1 + ): messages = [ { "role": "system", "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{ "summary": "summary", "words": ["word_1", "word_2"] }') + 'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }' + ) }, { "role": "user", - "content": ( - f'Summarize this text: "{text}"' - ) - - }, - { - "role": "user", - "content": ( - f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not ' - 'expressions, from the summary.' - ) + "content": f'Summarize this text: "{text}"' } ] @@ -148,22 +150,45 @@ class ReadingService(IReadingService): GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) - replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id) - options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5) + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"words": ["word_1", "word_2"] }' + ) + }, + { + "role": "user", + "content": ( + f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, ' + f'from this:\n{response["summary"]}' + ) + } + ] + + words_response = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + + response["words"] = words_response["words"] + replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders( + response["summary"], response["words"], start_id + ) + options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words) solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id) return { "allowRepetition": True, "id": str(uuid.uuid4()), "prompt": ( - "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are " + "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are " "more words than spaces so you will not use them all. You may use any of the words more than once." ), "solutions": solutions, "text": replaced_summary, "type": "fillBlanks", "words": options_words - } async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty): @@ -210,7 +235,8 @@ class ReadingService(IReadingService): "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' - '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}') + '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}' + ) }, { "role": "user", @@ -243,7 +269,8 @@ class ReadingService(IReadingService): "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' - '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}') + '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}' + ) }, { "role": "user", @@ -262,7 +289,7 @@ class ReadingService(IReadingService): options = [] for i, paragraph in enumerate(paragraphs, start=0): - paragraph["heading"] = headings[i] + paragraph["heading"] = headings[i]["heading"] options.append({ "id": paragraph["letter"], "sentence": paragraph["paragraph"] @@ -285,3 +312,38 @@ class ReadingService(IReadingService): "sentences": sentences[:quantity], "type": "matchSentences" } + + async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id): + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"ideas": [ ' + '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, ' + '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}' + ']}' + ) + }, + { + "role": "user", + "content": ( + f'From the text extract {quantity} ideas, theories, opinions and who they are from. ' + f'The text: {text}' + ) + } + ] + + response = await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + ideas = response["ideas"] + + return { + "id": str(uuid.uuid4()), + "allowRepetition": False, + "options": ExercisesHelper.build_options(ideas), + "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.", + "sentences": ExercisesHelper.build_sentences(ideas, start_id), + "type": "matchSentences" + } diff --git a/app/services/impl/speaking.py b/app/services/impl/speaking.py index 263b4b0..86cfe1c 100644 --- a/app/services/impl/speaking.py +++ b/app/services/impl/speaking.py @@ -3,7 +3,7 @@ import os import re import uuid import random -from typing import Dict, List +from typing import Dict, List, Optional from app.repositories.abc import IFileStorage, IDocumentStore from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService @@ -27,29 +27,49 @@ class SpeakingService(ISpeakingService): self._document_store = document_store self._stt = stt self._logger = logging.getLogger(__name__) + + # TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with + # either the difficulty in the request or a random one yet the prompt doesn't change self._tasks = { "task_1": { "get": { - "json_template": ( - '{"topic": "topic", "question": "question"}' - ), + "json_template": { + "first_topic": "topic 1", + "second_topic": "topic 2", + "questions": [ + ( + "Introductory question about the first topic, starting the topic with " + "'Let's talk about x' and then the question." + ), + "Follow up question about the first topic", + "Follow up question about the first topic", + "Question about second topic", + "Follow up question about the second topic", + ] + }, "prompt": ( - 'Craft a thought-provoking question of {difficulty} difficulty for IELTS Speaking Part 1 ' + 'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 ' 'that encourages candidates to delve deeply into personal experiences, preferences, or ' - 'insights on the topic of "{topic}". Instruct the candidate to offer not only detailed ' - 'descriptions but also provide nuanced explanations, examples, or anecdotes to enrich ' - 'their response. Make sure that the generated question does not contain forbidden subjects in ' + 'insights on the topic of "{first_topic}" and the topic of "{second_topic}". ' + 'Make sure that the generated question does not contain forbidden subjects in ' 'muslim countries.' ) } }, "task_2": { "get": { - "json_template": ( - '{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}' - ), + "json_template": { + "topic": "topic", + "question": "question", + "prompts": [ + "prompt_1", + "prompt_2", + "prompt_3" + ], + "suffix": "And explain why..." + }, "prompt": ( - 'Create a question of {difficulty} difficulty for IELTS Speaking Part 2 ' + 'Create a question of medium difficulty for IELTS Speaking Part 2 ' 'that encourages candidates to narrate a personal experience or story related to the topic ' 'of "{topic}". Include 3 prompts that guide the candidate to describe ' 'specific aspects of the experience, such as details about the situation, ' @@ -60,11 +80,18 @@ class SpeakingService(ISpeakingService): }, "task_3": { "get": { - "json_template": ( - '{"topic": "topic", "questions": ["question", "question", "question"]}' - ), + "json_template": { + "topic": "topic", + "questions": [ + "Introductory question about the topic.", + "Follow up question about the topic", + "Follow up question about the topic", + "Follow up question about the topic", + "Follow up question about the topic" + ] + }, "prompt": ( - 'Formulate a set of 3 questions of {difficulty} difficulty for IELTS Speaking Part 3 ' + 'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3' 'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". ' 'Provide inquiries, ensuring they explore various aspects, perspectives, and implications ' 'related to the topic. Make sure that the generated question does not contain forbidden ' @@ -74,28 +101,57 @@ class SpeakingService(ISpeakingService): }, } - async def get_speaking_task(self, task_id: int, topic: str, difficulty: str): - task_values = self._tasks[f'task_{task_id}']['get'] + async def get_speaking_part( + self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None + ) -> Dict: + task_values = self._tasks[f'task_{part}']['get'] + + if part == 1: + task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic) + else: + task_prompt = task_values["prompt"].format(topic=topic) + messages = [ { "role": "system", "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' + - task_values["json_template"] + 'You are a helpful assistant designed to output JSON on this format: ' + f'{task_values["json_template"]}' ) }, { "role": "user", - "content": str(task_values["prompt"]).format(topic=topic, difficulty=difficulty) + "content": task_prompt } ] + part_specific = { + "1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).', + "2": ( + 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams ' + 'that start with "And explain why".' + ) + } + + if part in {1, 2}: + messages.append({ + "role": "user", + "content": part_specific[str(part)] + }) + + if part in {1, 3}: + messages.append({ + "role": "user", + "content": 'They must be 1 single question each and not be double-barreled questions.' + }) + + fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS + response = await self._llm.prediction( - GPTModels.GPT_4_O, messages, FieldsAndExercises.GEN_FIELDS, TemperatureSettings.GEN_QUESTION_TEMPERATURE + GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE ) - # TODO: this was on GET /speaking_task_3 don't know if it is intentional only for 3 - if task_id == 3: + if part == 3: # Remove the numbers from the questions only if the string starts with a number response["questions"] = [ re.sub(r"^\d+\.\s*", "", question) @@ -103,117 +159,15 @@ class SpeakingService(ISpeakingService): for question in response["questions"] ] - response["type"] = task_id + response["type"] = part response["difficulty"] = difficulty - response["topic"] = topic + + if part in {2, 3}: + response["topic"] = topic + return response - async def grade_speaking_task_1_and_2( - self, task: int, question: str, answer_firebase_path: str, sound_file_name: str - ): - request_id = uuid.uuid4() - req_data = { - "question": question, - "answer": answer_firebase_path - } - self._logger.info( - f'POST - speaking_task_{task} - Received request to grade speaking task {task}. ' - f'Use this id to track the logs: {str(request_id)} - Request data: {str(req_data)}' - ) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {answer_firebase_path}') - - await self._file_storage.download_firebase_file(answer_firebase_path, sound_file_name) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloaded file {answer_firebase_path} to {sound_file_name}') - - answer = await self._stt.speech_to_text(sound_file_name) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer}') - - if TextHelper.has_x_words(answer, 20): - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"comment": "comment about answer quality", "overall": 0.0, ' - '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' - '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') - }, - { - "role": "user", - "content": ( - f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a ' - 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide ' - 'detailed commentary highlighting both strengths and weaknesses in the response.' - f'\n Question: "{question}" \n Answer: "{answer}"') - } - ] - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answer.') - - response = await self._llm.prediction( - GPTModels.GPT_3_5_TURBO, - messages, - ["comment"], - TemperatureSettings.GRADING_TEMPERATURE - ) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answer graded: {str(response)}') - - perfect_answer_messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"answer": "perfect answer"}' - ) - }, - { - "role": "user", - "content": ( - 'Provide a perfect answer according to ielts grading system to the following ' - f'Speaking Part {task} question: "{question}"') - } - ] - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting perfect answer.') - - response = await self._llm.prediction( - GPTModels.GPT_3_5_TURBO, - perfect_answer_messages, - ["answer"], - TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) - response['perfect_answer'] = response["answer"] - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Perfect answer: ' + response['perfect_answer']) - - response['transcript'] = answer - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting fixed text.') - - response['fixed_text'] = await self._get_speaking_corrections(answer) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Fixed text: ' + response['fixed_text']) - - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = self._calculate_overall(response) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}') - return response - else: - self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - ' - f'The answer had less words than threshold 20 to be graded. Answer: {answer}' - ) - - return self._zero_rating("The audio recorded does not contain enough english words to be graded.") - - # TODO: When there's more time grade_speaking_task_1_2 can be merged with this, when there's more time - async def grade_speaking_task_3(self, answers: Dict, task: int = 3): + async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: request_id = uuid.uuid4() self._logger.info( f'POST - speaking_task_{task} - Received request to grade speaking task {task}. ' @@ -222,157 +176,219 @@ class SpeakingService(ISpeakingService): text_answers = [] perfect_answers = [] - self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.' - ) + + if task != 2: + self._logger.info( + f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.' + ) + for item in answers: sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4()) - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {item["answer"]}') + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}') await self._file_storage.download_firebase_file(item["answer"], sound_file_name) self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - ' - 'Downloaded file ' + item["answer"] + f' to {sound_file_name}' + f'POST - speaking_task_{task} - {request_id} - ' + f'Downloaded file {item["answer"]} to {sound_file_name}' ) answer_text = await self._stt.speech_to_text(sound_file_name) - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer_text}') + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}') text_answers.append(answer_text) item["answer"] = answer_text os.remove(sound_file_name) + # TODO: This will end the grading of all answers if a single one does not have enough words + # don't know if this is intended if not TextHelper.has_x_words(answer_text, 20): self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - ' - f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}') + f'POST - speaking_task_{task} - {request_id} - ' + f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}' + ) return self._zero_rating("The audio recorded does not contain enough english words to be graded.") - perfect_answer_messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"answer": "perfect answer"}' - ) - }, - { - "role": "user", - "content": ( - 'Provide a perfect answer according to ielts grading system to the following ' - f'Speaking Part {task} question: "{item["question"]}"' - ) - } - ] self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - ' + f'POST - speaking_task_{task} - {request_id} - ' f'Requesting perfect answer for question: {item["question"]}' ) + perfect_answers.append(await self._get_perfect_answer(task, item["question"])) - perfect_answers.append( - await self._llm.prediction( - GPTModels.GPT_3_5_TURBO, - perfect_answer_messages, - ["answer"], - TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) + if task in {1, 3}: + self._logger.info( + f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.' ) - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"comment": "comment about answer quality", "overall": 0.0, ' - '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' - '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') - } - ] - message = ( - f"Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide detailed " - "commentary highlighting both strengths and weaknesses in the response." - "\n\n The questions and answers are: \n\n'") + formatted_text = "" + for i, entry in enumerate(answers, start=1): + formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" + formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" - self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - Formatting answers and questions for prompt.' - ) + self._logger.info( + f'POST - speaking_task_{task} - {request_id} - ' + f'Formatted answers and questions for prompt: {formatted_text}' + ) + questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}' + else: + questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"' - formatted_text = "" - for i, entry in enumerate(answers, start=1): - formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" - formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).') + response = await self._grade_task(task, questions_and_answers) - self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - Formatted answers and questions for prompt: {formatted_text}' - ) + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}') - message += formatted_text + if task in {1, 3}: + self._logger.info( + f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.') - messages.append({ - "role": "user", - "content": message - }) + # TODO: check if it is answer["answer"] instead + for i, answer in enumerate(perfect_answers, start=1): + response['perfect_answer_' + str(i)] = answer - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answers.') + self._logger.info( + f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.' + ) - response = await self._llm.prediction( - GPTModels.GPT_3_5_TURBO, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE - ) + for i, answer in enumerate(text_answers, start=1): + response['transcript_' + str(i)] = answer + response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer) + else: + response['transcript'] = answers[0]["answer"] - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answers graded: {str(response)}') + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.') + response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"]) + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}') - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Adding perfect answers to response.') - - for i, answer in enumerate(perfect_answers, start=1): - response['perfect_answer_' + str(i)] = answer - - self._logger.info( - f'POST - speaking_task_{task} - {str(request_id)} - Adding transcript and fixed texts to response.' - ) - - for i, answer in enumerate(text_answers, start=1): - response['transcript_' + str(i)] = answer - response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer) - - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = self._calculate_overall(response) - - self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}') + response['perfect_answer'] = perfect_answers[0]["answer"] + response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"]) + self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}') return response # ================================================================================================================== # grade_speaking_task helpers # ================================================================================================================== + async def _get_perfect_answer(self, task: int, question: str): + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}' + ) + }, + { + "role": "user", + "content": ( + 'Provide a perfect answer according to ielts grading system to the following ' + f'Speaking Part {task} question: "{question}"' + ) + } + ] + + if task == 1: + messages.append({ + "role": "user", + "content": 'The answer must be 2 or 3 sentences long.' + }) + + gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO + + return await self._llm.prediction( + gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE + ) + + async def _grade_task(self, task: int, questions_and_answers: str) -> Dict: + messages = [ + { + "role": "system", + "content": ( + f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}' + ) + }, + { + "role": "user", + "content": ( + f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a ' + 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' + 'assign a score of 0 if the response fails to address the question. Additionally, provide ' + 'detailed commentary highlighting both strengths and weaknesses in the response.' + ) + questions_and_answers + } + ] + + task_specific = { + "1": ( + 'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the ' + 'student that they should be.' + ), + "2": 'Address the student as "you"', + "3": 'Address the student as "you" and pay special attention to coherence between the answers.' + } + + messages.append({ + "role": "user", + "content": task_specific[str(task)] + }) + + if task in {1, 3}: + messages.extend([ + { + "role": "user", + "content": ( + 'For pronunciations act as if you heard the answers and they were transcripted ' + 'as you heard them.' + ) + }, + { + "role": "user", + "content": 'The comments must be long, detailed, justify the grading and suggest improvements.' + } + ]) + + return await self._llm.prediction( + GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE + ) + + @staticmethod + def _fix_speaking_overall(overall: float, task_response: dict): + grades = [category["grade"] for category in task_response.values()] + + if overall > max(grades) or overall < min(grades): + total_sum = sum(grades) + average = total_sum / len(grades) + rounded_average = round(average, 0) + return rounded_average + + return overall + @staticmethod def _zero_rating(comment: str): return { "comment": comment, "overall": 0, "task_response": { - "Fluency and Coherence": 0, - "Lexical Resource": 0, - "Grammatical Range and Accuracy": 0, - "Pronunciation": 0 + "Fluency and Coherence": { + "grade": 0.0, + "comment": "" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "" + }, + "Pronunciation": { + "grade": 0.0, + "comment": "" + } } } - @staticmethod - def _calculate_overall(response: Dict): - return round( - ( - response["task_response"]["Fluency and Coherence"] + - response["task_response"]["Lexical Resource"] + - response["task_response"]["Grammatical Range and Accuracy"] + - response["task_response"]["Pronunciation"] - ) / 4, 1 - ) - async def _get_speaking_corrections(self, text): messages = [ { @@ -409,6 +425,7 @@ class SpeakingService(ISpeakingService): self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}') async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int): + avatar = (random.choice(list(AvatarEnum))).value template_index = part - 1 # Using list comprehension to find the element with the desired value in the 'type' field @@ -418,26 +435,12 @@ class SpeakingService(ISpeakingService): if found_exercises: exercise = found_exercises[0] self._logger.info(f'Creating video for speaking part {part}') - if part in {1, 2}: - result = await self._create_video( - exercise["question"], - (random.choice(list(AvatarEnum))).value, - f'Failed to create video for part {part} question: {str(exercise["question"])}' - ) - if result is not None: - if part == 2: - template["exercises"][template_index]["prompts"] = exercise["prompts"] - - template["exercises"][template_index]["text"] = exercise["question"] - template["exercises"][template_index]["title"] = exercise["topic"] - template["exercises"][template_index]["video_url"] = result["video_url"] - template["exercises"][template_index]["video_path"] = result["video_path"] - else: + if part in {1, 3}: questions = [] for question in exercise["questions"]: result = await self._create_video( question, - (random.choice(list(AvatarEnum))).value, + avatar, f'Failed to create video for part {part} question: {str(exercise["question"])}' ) if result is not None: @@ -449,63 +452,139 @@ class SpeakingService(ISpeakingService): questions.append(video) template["exercises"][template_index]["prompts"] = questions - template["exercises"][template_index]["title"] = exercise["topic"] + if part == 1: + template["exercises"][template_index]["first_title"] = exercise["first_topic"] + template["exercises"][template_index]["second_title"] = exercise["second_topic"] + else: + template["exercises"][template_index]["title"] = exercise["topic"] + else: + result = await self._create_video( + exercise["question"], + avatar, + f'Failed to create video for part {part} question: {str(exercise["question"])}' + ) + if result is not None: + template["exercises"][template_index]["prompts"] = exercise["prompts"] + template["exercises"][template_index]["text"] = exercise["question"] + template["exercises"][template_index]["title"] = exercise["topic"] + template["exercises"][template_index]["video_url"] = result["video_url"] + template["exercises"][template_index]["video_path"] = result["video_path"] if not found_exercises: template["exercises"].pop(template_index) return template - # TODO: Check if it is intended to log the original question - async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]): - if len(prompts) > 0: - question = original_question + " In your answer you should consider: " + " ".join(prompts) - else: - question = original_question - - error_msg = f'Failed to create video for part 1 question: {original_question}' - - result = await self._create_video( - question, - avatar, - error_msg + async def generate_video( + self, part: int, avatar: str, topic: str, questions: list[str], + *, + second_topic: Optional[str] = None, + prompts: Optional[list[str]] = None, + suffix: Optional[str] = None, + ): + request_id = str(uuid.uuid4()) + # TODO: request data + self._logger.info( + f'POST - generate_video_{part} - Received request to generate video {part}. ' + f'Use this id to track the logs: {request_id} - Request data: " + str(request.get_json())' ) - if result is not None: - return { - "text": original_question, - "prompts": prompts, - "title": topic, - **result, - "type": "speaking", - "id": uuid.uuid4() - } - else: - return str(error_msg) + part_questions = self._get_part_questions(part, questions, avatar) + videos = [] - async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str): - sp_questions = [] - self._logger.info('Creating videos for speaking part 3') - for question in questions: + self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating videos for speaking part {part}.') + for question in part_questions: + self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating video for question: {question}') result = await self._create_video( question, avatar, - f'Failed to create video for part 3 question: {question}' + 'POST - generate_video_{p} - {r} - Failed to create video for part {p} question: {q}'.format( + p=part, r=request_id, q=question + ) ) - if result is not None: + self._logger.info(f'POST - generate_video_{part} - {request_id} - Video created') + self._logger.info( + f'POST - generate_video_{part} - {request_id} - Uploaded video to firebase: {result["video_url"]}' + ) video = { "text": question, - **result + "video_path": result["video_path"], + "video_url": result["video_url"] } - sp_questions.append(video) + videos.append(video) - return { - "prompts": sp_questions, - "title": topic, - "type": "interactiveSpeaking", - "id": uuid.uuid4() - } + if part == 2 and len(videos) == 0: + raise Exception(f'Failed to create video for part 2 question: {questions[0]}') + + return self._get_part_response(part, topic, videos, second_topic, prompts, suffix) + + @staticmethod + def _get_part_questions(part: int, questions: list[str], avatar: str): + part_questions: list[str] = [] + + if part == 1: + id_to_name = { + "5912afa7c77c47d3883af3d874047aaf": "MATTHEW", + "9e58d96a383e4568a7f1e49df549e0e4": "VERA", + "d2cdd9c0379a4d06ae2afb6e5039bd0c": "EDWARD", + "045cb5dcd00042b3a1e4f3bc1c12176b": "TANYA", + "1ae1e5396cc444bfad332155fdb7a934": "KAYLA", + "0ee6aa7cc1084063a630ae514fccaa31": "JEROME", + "5772cff935844516ad7eeff21f839e43": "TYLER", + + } + part_questions.extend( + [ + "Hello my name is " + id_to_name.get(avatar) + ", what is yours?", + "Do you work or do you study?", + *questions + ] + ) + elif part == 2: + # Removed as the examiner should not say what is on the card. + # question = question + " In your answer you should consider: " + " ".join(prompts) + suffix + part_questions.append(f'{questions[0]}\nYou have 1 minute to take notes.') + elif part == 3: + part_questions = questions + + return part_questions + + @staticmethod + def _get_part_response( + part: int, + topic: str, + videos: list[dict], + second_topic: Optional[str], + prompts: Optional[list[str]], + suffix: Optional[str] + ): + response = {} + if part == 1: + response = { + "prompts": videos, + "first_title": topic, + "second_title": second_topic, + "type": "interactiveSpeaking" + } + if part == 2: + response = { + "prompts": prompts, + "title": topic, + "suffix": suffix, + "type": "speaking", + # includes text, video_url and video_path + **videos[0] + } + if part == 3: + response = { + "prompts": videos, + "title": topic, + "type": "interactiveSpeaking", + } + + response["id"] = str(uuid.uuid4()) + return response async def _create_video(self, question: str, avatar: str, error_message: str): result = await self._vid_gen.create_video(question, avatar) @@ -519,3 +598,36 @@ class SpeakingService(ISpeakingService): } self._logger.error(error_message) return None + + @staticmethod + def _grade_template(): + return { + "comment": "extensive comment about answer quality", + "overall": 0.0, + "task_response": { + "Fluency and Coherence": { + "grade": 0.0, + "comment": ( + "extensive comment about fluency and coherence, use examples to justify the grade awarded." + ) + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "extensive comment about lexical resource, use examples to justify the grade awarded." + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": ( + "extensive comment about grammatical range and accuracy, use examples to justify the " + "grade awarded." + ) + }, + "Pronunciation": { + "grade": 0.0, + "comment": ( + "extensive comment about pronunciation on the transcribed answer, use examples to justify the " + "grade awarded." + ) + } + } + } \ No newline at end of file diff --git a/app/services/impl/third_parties/openai.py b/app/services/impl/third_parties/openai.py index 3c7eed2..e049d93 100644 --- a/app/services/impl/third_parties/openai.py +++ b/app/services/impl/third_parties/openai.py @@ -1,13 +1,16 @@ import json import re import logging -from typing import List, Optional +from typing import List, Optional, Callable, TypeVar from openai import AsyncOpenAI from openai.types.chat import ChatCompletionMessageParam from app.services.abc import ILLMService from app.helpers import count_tokens from app.configs.constants import BLACKLISTED_WORDS +from pydantic import BaseModel + +T = TypeVar('T', bound=BaseModel) class OpenAI(ILLMService): @@ -18,6 +21,7 @@ class OpenAI(ILLMService): def __init__(self, client: AsyncOpenAI): self._client = client self._logger = logging.getLogger(__name__) + self._default_model = "gpt-4o-2024-08-06" async def prediction( self, @@ -94,4 +98,53 @@ class OpenAI(ILLMService): @staticmethod def _check_fields(obj, fields): - return all(field in obj for field in fields) \ No newline at end of file + return all(field in obj for field in fields) + + async def pydantic_prediction( + self, + messages: List[ChatCompletionMessageParam], + map_to_model: Callable, + json_scheme: str, + *, + model: Optional[str] = None, + temperature: Optional[float] = None, + max_retries: int = 3 + ) -> List[T] | T | None: + params = { + "messages": messages, + "response_format": {"type": "json_object"}, + "model": model if model else self._default_model + } + + if temperature: + params["temperature"] = temperature + + attempt = 0 + while attempt < max_retries: + result = await self._client.chat.completions.create(**params) + result_content = result.choices[0].message.content + try: + result_json = json.loads(result_content) + return map_to_model(result_json) + except Exception as e: + attempt += 1 + self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}") + params["messages"] = [ + { + "role": "user", + "content": ( + "Your previous response wasn't in the json format I've explicitly told you to output. " + f"In your next response, you will fix it and return me just the json I've asked." + ) + }, + { + "role": "user", + "content": ( + f"Previous response: {result_content}\n" + f"JSON format: {json_scheme}" + ) + } + ] + if attempt >= max_retries: + self._logger.error(f"Max retries exceeded!") + return None diff --git a/app/services/impl/training.py b/app/services/impl/training.py deleted file mode 100644 index d74abcf..0000000 --- a/app/services/impl/training.py +++ /dev/null @@ -1,68 +0,0 @@ -import re -from functools import reduce - -from app.configs.constants import TemperatureSettings, GPTModels -from app.helpers import count_tokens -from app.services.abc import ILLMService, ITrainingService - - -class TrainingService(ITrainingService): - - def __init__(self, llm: ILLMService): - self._llm = llm - - async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str): - messages = self._get_question_tips(question, answer, correct_answer, context) - - token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], - map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) - - response = await self._llm.prediction( - GPTModels.GPT_3_5_TURBO, - messages, - None, - TemperatureSettings.TIPS_TEMPERATURE, - token_count=token_count - ) - - if isinstance(response, str): - response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response) - - return response - - @staticmethod - def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None): - messages = [ - { - "role": "user", - "content": ( - "You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to " - "help students understand why it was a wrong answer and gives helpful insight for the future. " - "The tip should refer to the context and question." - ), - } - ] - - if not (context is None or context == ""): - messages.append({ - "role": "user", - "content": f"This is the context for the question: {context}", - }) - - messages.extend([ - { - "role": "user", - "content": f"This is the question: {question}", - }, - { - "role": "user", - "content": f"This is the answer: {answer}", - }, - { - "role": "user", - "content": f"This is the correct answer: {correct_answer}", - } - ]) - - return messages - diff --git a/app/services/impl/training/__init__.py b/app/services/impl/training/__init__.py new file mode 100644 index 0000000..8ea231b --- /dev/null +++ b/app/services/impl/training/__init__.py @@ -0,0 +1,7 @@ +from .training import TrainingService +from .kb import TrainingContentKnowledgeBase + +__all__ = [ + "TrainingService", + "TrainingContentKnowledgeBase" +] diff --git a/app/services/impl/training/kb.py b/app/services/impl/training/kb.py new file mode 100644 index 0000000..dce316e --- /dev/null +++ b/app/services/impl/training/kb.py @@ -0,0 +1,88 @@ +import json +import os +from logging import getLogger +from typing import Dict, List + +import faiss +import pickle + +from app.services.abc import IKnowledgeBase + + +class TrainingContentKnowledgeBase(IKnowledgeBase): + + def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'): + self._embedding_model = embeddings + self._tips = None # self._read_json(path) + self._category_metadata = None + self._indices = None + self.load_indices_and_metadata() + self._logger = getLogger(__name__) + + @staticmethod + def _read_json(path: str) -> Dict[str, any]: + with open(path, 'r', encoding="utf-8") as json_file: + return json.loads(json_file.read()) + + def print_category_count(self): + category_tips = {} + for unit in self._tips['units']: + for page in unit['pages']: + for tip in page['tips']: + category = tip['category'].lower().replace(" ", "_") + if category not in category_tips: + category_tips[category] = 0 + else: + category_tips[category] = category_tips[category] + 1 + print(category_tips) + + def create_embeddings_and_save_them(self) -> None: + category_embeddings = {} + category_metadata = {} + + for unit in self._tips['units']: + for page in unit['pages']: + for tip in page['tips']: + category = tip['category'].lower().replace(" ", "_") + if category not in category_embeddings: + category_embeddings[category] = [] + category_metadata[category] = [] + + category_embeddings[category].append(tip['embedding']) + category_metadata[category].append({"id": tip['id'], "text": tip['text']}) + + category_indices = {} + for category, embeddings in category_embeddings.items(): + embeddings_array = self._embedding_model.encode(embeddings) + index = faiss.IndexFlatL2(embeddings_array.shape[1]) + index.add(embeddings_array) + category_indices[category] = index + + faiss.write_index(index, f"./faiss/{category}_tips_index.faiss") + + with open("./faiss/tips_metadata.pkl", "wb") as f: + pickle.dump(category_metadata, f) + + def load_indices_and_metadata( + self, + directory: str = './faiss', + suffix: str = '_tips_index.faiss', + metadata_path: str = './faiss/tips_metadata.pkl' + ): + files = os.listdir(directory) + self._indices = {} + for file in files: + if file.endswith(suffix): + self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}') + self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.') + + with open(metadata_path, 'rb') as f: + self._category_metadata = pickle.load(f) + self._logger.info("Loaded tips metadata") + + def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]: + query_embedding = self._embedding_model.encode([query]) + index = self._indices[category] + D, I = index.search(query_embedding, top_k) + results = [self._category_metadata[category][i] for i in I[0]] + return results diff --git a/app/services/impl/training/training.py b/app/services/impl/training/training.py new file mode 100644 index 0000000..53f897b --- /dev/null +++ b/app/services/impl/training/training.py @@ -0,0 +1,459 @@ +import re +from datetime import datetime +from functools import reduce +from logging import getLogger + +from typing import Dict, List + +from app.configs.constants import TemperatureSettings, GPTModels +from app.helpers import count_tokens +from app.repositories.abc import IDocumentStore +from app.services.abc import ILLMService, ITrainingService, IKnowledgeBase +from app.dtos.training import * + + +class TrainingService(ITrainingService): + TOOLS = [ + 'critical_thinking', + 'language_for_writing', + 'reading_skills', + 'strategy', + 'words', + 'writing_skills' + ] + # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing + + def __init__(self, llm: ILLMService, firestore: IDocumentStore, training_kb: IKnowledgeBase): + self._llm = llm + self._db = firestore + self._kb = training_kb + self._logger = getLogger(__name__) + + async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str): + messages = self._get_question_tips(question, answer, correct_answer, context) + + token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], + map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) + + response = await self._llm.prediction( + GPTModels.GPT_3_5_TURBO, + messages, + None, + TemperatureSettings.TIPS_TEMPERATURE, + token_count=token_count + ) + + if isinstance(response, str): + response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response) + + return response + + @staticmethod + def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None): + messages = [ + { + "role": "user", + "content": ( + "You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to " + "help students understand why it was a wrong answer and gives helpful insight for the future. " + "The tip should refer to the context and question." + ), + } + ] + + if not (context is None or context == ""): + messages.append({ + "role": "user", + "content": f"This is the context for the question: {context}", + }) + + messages.extend([ + { + "role": "user", + "content": f"This is the question: {question}", + }, + { + "role": "user", + "content": f"This is the answer: {answer}", + }, + { + "role": "user", + "content": f"This is the correct answer: {correct_answer}", + } + ]) + + return messages + + async def get_training_content(self, training_content: Dict) -> Dict: + user, stats = training_content["userID"], training_content["stats"] + exam_data, exam_map = await self._sort_out_solutions(stats) + training_content = await self._get_exam_details_and_tips(exam_data) + tips = self._query_kb(training_content.queries) + usefull_tips = await self._get_usefull_tips(exam_data, tips) + exam_map = self._merge_exam_map_with_details(exam_map, training_content.details) + + weak_areas = {"weak_areas": []} + for area in training_content.weak_areas: + weak_areas["weak_areas"].append(area.dict()) + + training_doc = { + 'created_at': int(datetime.now().timestamp() * 1000), + **exam_map, + **usefull_tips.dict(), + **weak_areas, + "user": user + } + doc_id = await self._db.save_to_db('training', training_doc) + return { + "id": doc_id + } + + @staticmethod + def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]): + new_exam_map = {"exams": []} + for detail in details: + new_exam_map["exams"].append({ + "id": detail.exam_id, + "date": detail.date, + "performance_comment": detail.performance_comment, + "detailed_summary": detail.detailed_summary, + **exam_map[detail.exam_id] + }) + return new_exam_map + + def _query_kb(self, queries: List[QueryDTO]): + map_categories = { + "critical_thinking": "ct_focus", + "language_for_writing": "language_for_writing", + "reading_skills": "reading_skill", + "strategy": "strategy", + "writing_skills": "writing_skill" + } + + tips = {"tips": []} + for query in queries: + if query.category == "words": + tips["tips"].extend( + self._kb.query_knowledge_base(query.text, "word_link") + ) + tips["tips"].extend( + self._kb.query_knowledge_base(query.text, "word_partners") + ) + else: + if query.category in map_categories: + tips["tips"].extend( + self._kb.query_knowledge_base(query.text, map_categories[query.category]) + ) + else: + self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.") + return tips + + async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO: + json_schema = ( + '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],' + ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }' + ) + messages = [ + { + "role": "user", + "content": ( + f"I'm going to provide you with exam data, you will take the exam data and fill this json " + f'schema : {json_schema}. "performance_comment" is a short sentence that describes the ' + 'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed ' + 'summary of the student\'s performance, "weak_areas" are identified areas' + ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues' + ' with sentence structure and punctuation.", the "queries" field is where you will write queries ' + 'for tips that will be displayed to the student, the category attribute is a collection of ' + 'embeddings and the text will be the text used to query the knowledge base. The categories are ' + f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field ' + '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student' + ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the ' + 'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question ' + 'and you must address that in your response. Also questions aren\'t modules, the only modules are: ' + 'level, speaking, writing, reading and listening. The details array needs to be tailored to the ' + 'exam attempt, even if you receive the same exam you must treat as different exams by their id.' + 'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows ' + 'which is the exam your comments and summary are referencing too. Even if the student hasn\'t ' + 'submitted no answers for an exam, you must still fill the details structure addressing that fact.' + ) + }, + { + "role": "user", + "content": f'Exam Data: {str(exam_data)}' + } + ] + return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema) + + async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO: + json_schema = ( + '{ "tip_ids": [] }' + ) + messages = [ + { + "role": "user", + "content": ( + f"I'm going to provide you with tips and I want you to return to me the tips that " + f"can be usefull for the student that made the exam that I'm going to send you, return " + f"me the tip ids in this json format {json_schema}." + ) + }, + { + "role": "user", + "content": f'Exam Data: {str(exam_data)}' + }, + { + "role": "user", + "content": f'Tips: {str(tips)}' + } + ] + return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema) + + @staticmethod + def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO: + parsed_response = { + "details": [DetailsDTO(**detail) for detail in response["details"]], + "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]], + "queries": [QueryDTO(**query) for query in response["queries"]] + } + return TrainingContentDTO(**parsed_response) + + async def _sort_out_solutions(self, stats): + grouped_stats = {} + for stat in stats: + session_key = f'{str(stat["date"])}-{stat["user"]}' + module = stat["module"] + exam_id = stat["exam"] + + if session_key not in grouped_stats: + grouped_stats[session_key] = {} + if module not in grouped_stats[session_key]: + grouped_stats[session_key][module] = { + "stats": [], + "exam_id": exam_id + } + grouped_stats[session_key][module]["stats"].append(stat) + + exercises = {} + exam_map = {} + for session_key, modules in grouped_stats.items(): + exercises[session_key] = {} + for module, module_stats in modules.items(): + exercises[session_key][module] = {} + + exam_id = module_stats["exam_id"] + if exam_id not in exercises[session_key][module]: + exercises[session_key][module][exam_id] = {"date": None, "exercises": []} + + exam_total_questions = 0 + exam_total_correct = 0 + + for stat in module_stats["stats"]: + exam_total_questions += stat["score"]["total"] + exam_total_correct += stat["score"]["correct"] + exercises[session_key][module][exam_id]["date"] = stat["date"] + + if session_key not in exam_map: + exam_map[session_key] = {"stat_ids": [], "score": 0} + exam_map[session_key]["stat_ids"].append(stat["id"]) + + exam = await self._db.get_doc_by_id(module, exam_id) + if module == "listening": + exercises[session_key][module][exam_id]["exercises"].extend( + self._get_listening_solutions(stat, exam)) + elif module == "reading": + exercises[session_key][module][exam_id]["exercises"].extend( + self._get_reading_solutions(stat, exam)) + elif module == "writing": + exercises[session_key][module][exam_id]["exercises"].extend( + self._get_writing_prompts_and_answers(stat, exam) + ) + elif module == "speaking": + exercises[session_key][module][exam_id]["exercises"].extend( + self._get_speaking_solutions(stat, exam) + ) + elif module == "level": + exercises[session_key][module][exam_id]["exercises"].extend( + self._get_level_solutions(stat, exam) + ) + + exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100) + exam_map[session_key]["module"] = module + + return {"exams": exercises}, exam_map + + def _get_writing_prompts_and_answers(self, stat, exam): + result = [] + try: + exercises = [] + for solution in stat['solutions']: + answer = solution['solution'] + exercise_id = solution['id'] + exercises.append({ + "exercise_id": exercise_id, + "answer": answer + }) + for exercise in exercises: + for exam_exercise in exam["exercises"]: + if exam_exercise["id"] == exercise["exercise_id"]: + result.append({ + "exercise": exam_exercise["prompt"], + "answer": exercise["answer"] + }) + + except KeyError as e: + self._logger.warning(f"Malformed stat object: {str(e)}") + + return result + + @staticmethod + def _get_mc_question(exercise, stat): + shuffle_maps = stat.get("shuffleMaps", []) + answer = stat["solutions"] if len(shuffle_maps) == 0 else [] + if len(shuffle_maps) != 0: + for solution in stat["solutions"]: + shuffle_map = [ + item["map"] for item in shuffle_maps + if item["questionID"] == solution["question"] + ] + answer.append({ + "question": solution["question"], + "option": shuffle_map[solution["option"]] + }) + return { + "question": exercise["prompt"], + "exercise": exercise["questions"], + "answer": stat["solutions"] + } + + @staticmethod + def _swap_key_name(d, original_key, new_key): + d[new_key] = d.pop(original_key) + return d + + def _get_level_solutions(self, stat, exam): + result = [] + try: + for part in exam["parts"]: + for exercise in part["exercises"]: + if exercise["id"] == stat["exercise"]: + if stat["type"] == "fillBlanks": + result.append({ + "prompt": exercise["prompt"], + "template": exercise["text"], + "words": exercise["words"], + "solutions": exercise["solutions"], + "answer": [ + self._swap_key_name(item, 'solution', 'option') + for item in stat["solutions"] + ] + }) + elif stat["type"] == "multipleChoice": + result.append(self._get_mc_question(exercise, stat)) + except KeyError as e: + self._logger.warning(f"Malformed stat object: {str(e)}") + return result + + def _get_listening_solutions(self, stat, exam): + result = [] + try: + for part in exam["parts"]: + for exercise in part["exercises"]: + if exercise["id"] == stat["exercise"]: + if stat["type"] == "writeBlanks": + result.append({ + "question": exercise["prompt"], + "template": exercise["text"], + "solution": exercise["solutions"], + "answer": stat["solutions"] + }) + elif stat["type"] == "fillBlanks": + result.append({ + "question": exercise["prompt"], + "template": exercise["text"], + "words": exercise["words"], + "solutions": exercise["solutions"], + "answer": stat["solutions"] + }) + elif stat["type"] == "multipleChoice": + result.append(self._get_mc_question(exercise, stat)) + + except KeyError as e: + self._logger.warning(f"Malformed stat object: {str(e)}") + return result + + @staticmethod + def _find_shuffle_map(shuffle_maps, question_id): + return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None) + + def _get_speaking_solutions(self, stat, exam): + result = {} + try: + result = { + "comments": { + key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()} + , + "exercises": {} + } + + for exercise in exam["exercises"]: + if exercise["id"] == stat["exercise"]: + if stat["type"] == "interactiveSpeaking": + for i in range(len(exercise["prompts"])): + result["exercises"][f"exercise_{i+1}"] = { + "question": exercise["prompts"][i]["text"] + } + for i in range(len(exercise["prompts"])): + answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '') + result["exercises"][f"exercise_{i+1}"]["answer"] = answer + elif stat["type"] == "speaking": + result["exercises"]["exercise_1"] = { + "question": exercise["text"], + "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '') + } + except KeyError as e: + self._logger.warning(f"Malformed stat object: {str(e)}") + return [result] + + def _get_reading_solutions(self, stat, exam): + result = [] + try: + for part in exam["parts"]: + text = part["text"] + for exercise in part["exercises"]: + if exercise["id"] == stat["exercise"]: + if stat["type"] == "fillBlanks": + result.append({ + "text": text, + "question": exercise["prompt"], + "template": exercise["text"], + "words": exercise["words"], + "solutions": exercise["solutions"], + "answer": stat["solutions"] + }) + elif stat["type"] == "writeBlanks": + result.append({ + "text": text, + "question": exercise["prompt"], + "template": exercise["text"], + "solutions": exercise["solutions"], + "answer": stat["solutions"] + }) + elif stat["type"] == "trueFalse": + result.append({ + "text": text, + "questions": exercise["questions"], + "answer": stat["solutions"] + }) + elif stat["type"] == "matchSentences": + result.append({ + "text": text, + "question": exercise["prompt"], + "sentences": exercise["sentences"], + "options": exercise["options"], + "answer": stat["solutions"] + }) + except KeyError as e: + self._logger.warning(f"Malformed stat object: {str(e)}") + return result + + diff --git a/app/services/impl/writing.py b/app/services/impl/writing.py index 3425cd3..9bf19ff 100644 --- a/app/services/impl/writing.py +++ b/app/services/impl/writing.py @@ -1,5 +1,7 @@ +from typing import List, Dict + from app.services.abc import IWritingService, ILLMService, IAIDetectorService -from app.configs.constants import GPTModels, TemperatureSettings +from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises from app.helpers import TextHelper, ExercisesHelper @@ -17,10 +19,7 @@ class WritingService(IWritingService): 'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}' ) }, - { - "role": "user", - "content": self._get_writing_prompt(task, topic, difficulty) - } + *self._get_writing_messages(task, topic, difficulty) ] llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O @@ -32,15 +31,18 @@ class WritingService(IWritingService): TemperatureSettings.GEN_QUESTION_TEMPERATURE ) + question = response["prompt"].strip() + return { - "question": response["prompt"].strip(), + "question": self._add_newline_before_hyphen(question) if task == 1 else question, "difficulty": difficulty, "topic": topic } @staticmethod - def _get_writing_prompt(task: int, topic: str, difficulty: str): - return ( + def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]: + # TODO: Should the muslim disclaimer be added to task 2? + task_prompt = ( 'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the ' 'student to compose a letter. The prompt should present a specific scenario or situation, ' f'based on the topic of "{topic}", requiring the student to provide information, ' @@ -52,32 +54,41 @@ class WritingService(IWritingService): f'analysis of contrasting perspectives on the topic of "{topic}".' ) + task_instructions = ( + 'The prompt should end with "In the letter you should" followed by 3 bullet points of what ' + 'the answer should include.' + ) if task == 1 else ( + 'The question should lead to an answer with either "theories", "complicated information" or ' + 'be "very descriptive" on the topic.' + ) + + messages = [ + { + "role": "user", + "content": task_prompt + }, + { + "role": "user", + "content": task_instructions + } + ] + + return messages + async def grade_writing_task(self, task: int, question: str, answer: str): bare_minimum = 100 if task == 1 else 180 - minimum = 150 if task == 1 else 250 - - # TODO: left as is, don't know if this is intended or not - llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O - temperature = ( - TemperatureSettings.GRADING_TEMPERATURE - if task == 1 else - TemperatureSettings.GEN_QUESTION_TEMPERATURE - ) if not TextHelper.has_words(answer): return self._zero_rating("The answer does not contain enough english words.") elif not TextHelper.has_x_words(answer, bare_minimum): return self._zero_rating("The answer is insufficient and too small to be graded.") else: + template = self._get_writing_template() messages = [ { "role": "system", "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"perfect_answer": "example perfect answer", "comment": ' - '"comment about answer quality", "overall": 0.0, "task_response": ' - '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' - '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }' + f'You are a helpful assistant designed to output JSON on this format: {template}' ) }, { @@ -86,16 +97,28 @@ class WritingService(IWritingService): f'Evaluate the given Writing Task {task} response based on the IELTS grading system, ' 'ensuring a strict assessment that penalizes errors. Deduct points for deviations ' 'from the task, and assign a score of 0 if the response fails to address the question. ' - f'Additionally, provide an exemplary answer with a minimum of {minimum} words, along with a ' - 'detailed commentary highlighting both strengths and weaknesses in the response. ' + 'Additionally, provide a detailed commentary highlighting both strengths and ' + 'weaknesses in the response. ' f'\n Question: "{question}" \n Answer: "{answer}"') - }, - { - "role": "user", - "content": f'The perfect answer must have at least {minimum} words.' } ] + if task == 1: + messages.append({ + "role": "user", + "content": ( + 'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", ' + '"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"' + ) + }) + + llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O + temperature = ( + TemperatureSettings.GRADING_TEMPERATURE + if task == 1 else + TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + response = await self._llm.prediction( llm_model, messages, @@ -103,6 +126,10 @@ class WritingService(IWritingService): temperature ) + perfect_answer_minimum = 150 if task == 1 else 250 + perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum) + + response["perfect_answer"] = perfect_answer["perfect_answer"] response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"]) response['fixed_text'] = await self._get_fixed_text(answer) @@ -114,13 +141,20 @@ class WritingService(IWritingService): async def _get_fixed_text(self, text): messages = [ - {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"fixed_text": "fixed test with no misspelling errors"}') - }, - {"role": "user", "content": ( + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"fixed_text": "fixed test with no misspelling errors"}' + ) + }, + { + "role": "user", + "content": ( 'Fix the errors in the given text and put it in a JSON. ' - f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"') - } + f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"' + ) + } ] response = await self._llm.prediction( @@ -132,16 +166,83 @@ class WritingService(IWritingService): ) return response["fixed_text"] + async def _get_perfect_answer(self, question: str, size: int) -> Dict: + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"perfect_answer": "perfect answer for the question"}' + ) + }, + { + "role": "user", + "content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}' + + }, + { + "role": "user", + "content": f'The answer must have at least {size} words' + } + ] + return await self._llm.prediction( + GPTModels.GPT_4_O, + messages, + ["perfect_answer"], + TemperatureSettings.GEN_QUESTION_TEMPERATURE + ) + @staticmethod def _zero_rating(comment: str): return { 'comment': comment, 'overall': 0, 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 + 'Task Achievement': { + "grade": 0.0, + "comment": "" + }, + 'Coherence and Cohesion': { + "grade": 0.0, + "comment": "" + }, + 'Lexical Resource': { + "grade": 0.0, + "comment": "" + }, + 'Grammatical Range and Accuracy': { + "grade": 0.0, + "comment": "" + } } } + @staticmethod + def _get_writing_template(): + return { + "comment": "comment about student's response quality", + "overall": 0.0, + "task_response": { + "Task Achievement": { + "grade": 0.0, + "comment": "comment about Task Achievement of the student's response" + }, + "Coherence and Cohesion": { + "grade": 0.0, + "comment": "comment about Coherence and Cohesion of the student's response" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "comment about Lexical Resource of the student's response" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "comment about Grammatical Range and Accuracy of the student's response" + } + } + } + + @staticmethod + def _add_newline_before_hyphen(s): + return s.replace(" -", "\n-") + diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..f366ec6 --- /dev/null +++ b/app/utils/__init__.py @@ -0,0 +1,5 @@ +from .handle_exception import handle_exception + +__all__ = [ + "handle_exception" +] diff --git a/app/utils/handle_exception.py b/app/utils/handle_exception.py new file mode 100644 index 0000000..1c340d6 --- /dev/null +++ b/app/utils/handle_exception.py @@ -0,0 +1,15 @@ +import functools +from typing import Callable, Any +from fastapi import Response + + +def handle_exception(status_code: int = 500): + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + async def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return await func(*args, **kwargs) + except Exception as e: + return Response(content=str(e), status_code=status_code) + return wrapper + return decorator diff --git a/pyproject.toml b/pyproject.toml index 25243aa..bc5026a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "encoach-be" +name = "ielts-be" version = "0.1.0" description = "" authors = ["Ecrop Devteam "] @@ -17,6 +17,14 @@ firebase-admin = "^6.5.0" wonderwords = "^2.2.0" dependency-injector = "^4.41.0" openai = "^1.37.0" +python-multipart = "0.0.9" +faiss-cpu = "1.8.0.post1" +pypandoc = "1.13" +pdfplumber = "0.11.3" +numpy = "1.26.4" +pillow = "10.4.0" +sentence-transformers = "3.0.1" +openai-whisper = "20231117" [build-system] diff --git a/tmp/placeholder.txt b/tmp/placeholder.txt new file mode 100644 index 0000000..f89d219 --- /dev/null +++ b/tmp/placeholder.txt @@ -0,0 +1 @@ +THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO \ No newline at end of file