diff --git a/app.py b/app.py index 2fe2384..c3cd735 100644 --- a/app.py +++ b/app.py @@ -419,72 +419,56 @@ def get_writing_task_2_general_question(): def grade_speaking_task_1(): request_id = uuid.uuid4() delete_files_older_than_one_day(AUDIO_FILES_PATH) - sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) logging.info("POST - speaking_task_1 - Received request to grade speaking task 1. " "Use this id to track the logs: " + str(request_id) + " - Request data: " + str(request.get_json())) try: data = request.get_json() - question = data.get('question') - answer_firebase_path = data.get('answer') - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + answer_firebase_path) - download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) + answers = data.get('answers') + text_answers = [] + perfect_answers = [] logging.info("POST - speaking_task_1 - " + str( - request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name) + request_id) + " - Received " + str(len(answers)) + " total answers.") - answer = speech_to_text(sound_file_name) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer) + for item in answers: + sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) - json_format = { - "comment": "extensive comment about answer quality", - "overall": 0.0, - "task_response": { - "Fluency and Coherence": { - "grade": 0.0, - "comment": "extensive comment about fluency and coherence, use examples to justify the grade awarded." - }, - "Lexical Resource": { - "grade": 0.0, - "comment": "extensive comment about lexical resource, use examples to justify the grade awarded." - }, - "Grammatical Range and Accuracy": { - "grade": 0.0, - "comment": "extensive comment about grammatical range and accuracy, use examples to justify the grade awarded." - }, - "Pronunciation": { - "grade": 0.0, - "comment": "extensive comment about pronunciation on the transcribed answer, use examples to justify the grade awarded." + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + item["answer"]) + download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name) + logging.info("POST - speaking_task_1 - " + str( + request_id) + " - Downloaded file " + item["answer"] + " to " + sound_file_name) + + answer_text = speech_to_text(sound_file_name) + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer_text) + + text_answers.append(answer_text) + item["answer"] = answer_text + os.remove(sound_file_name) + + if not has_x_words(answer_text, 20): + logging.info("POST - speaking_task_1 - " + str( + request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer_text) + return { + "comment": "The audio recorded does not contain enough english words to be graded.", + "overall": 0, + "task_response": { + "Fluency and Coherence": { + "grade": 0.0, + "comment": "" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "" + }, + "Pronunciation": { + "grade": 0.0, + "comment": "" + } + } } - } - } - - if has_x_words(answer, 20): - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) - }, - { - "role": "user", - "content": ( - 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a ' - 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide ' - 'detailed commentary highlighting both strengths and weaknesses in the response.' - '\n Question: "' + question + '" \n Answer: "' + answer + '"') - }, - { - "role": "user", - "content": 'Address the student as "you"' - } - ] - token_count = count_total_tokens(messages) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.") - response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], - GRADING_TEMPERATURE) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response)) perfect_answer_messages = [ { @@ -496,61 +480,111 @@ def grade_speaking_task_1(): "role": "user", "content": ( 'Provide a perfect answer according to ielts grading system to the following ' - 'Speaking Part 1 question: "' + question + '"') + 'Speaking Part 1 question: "' + item["question"] + '"') + }, + { + "role": "user", + "content": 'The answer must be 2 or 3 sentences long.' } ] + token_count = count_total_tokens(perfect_answer_messages) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.") - response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO, - perfect_answer_messages, - token_count, - ["answer"], - GEN_QUESTION_TEMPERATURE)["answer"] logging.info("POST - speaking_task_1 - " + str( - request_id) + " - Perfect answer: " + response['perfect_answer']) + request_id) + " - Requesting perfect answer for question: " + item["question"]) + perfect_answers.append(make_openai_call(GPT_4_O, + perfect_answer_messages, + token_count, + ["answer"], + GEN_QUESTION_TEMPERATURE)) - response['transcript'] = answer - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting fixed text.") - response['fixed_text'] = get_speaking_corrections(answer) - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Fixed text: " + response['fixed_text']) - - if response["overall"] == "0.0" or response["overall"] == 0.0: - response["overall"] = round((response["task_response"]["Fluency and Coherence"] + - response["task_response"]["Lexical Resource"] + response["task_response"][ - "Grammatical Range and Accuracy"] + response["task_response"][ - "Pronunciation"]) / 4, 1) - - logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response)) - return response - else: - logging.info("POST - speaking_task_1 - " + str( - request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer) - return { - "comment": "The audio recorded does not contain enough english words to be graded.", - "overall": 0, - "task_response": { - "Fluency and Coherence": { - "grade": 0.0, - "comment": "" - }, - "Lexical Resource": { - "grade": 0.0, - "comment": "" - }, - "Grammatical Range and Accuracy": { - "grade": 0.0, - "comment": "" - }, - "Pronunciation": { - "grade": 0.0, - "comment": "" - } + json_format = { + "comment": "comment about answers quality", + "overall": 0.0, + "task_response": { + "Fluency and Coherence": { + "grade": 0.0, + "comment": "comment about fluency and coherence" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "comment about lexical resource" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "comment about grammatical range and accuracy" + }, + "Pronunciation": { + "grade": 0.0, + "comment": "comment about pronunciation on the transcribed answers" } } + } + + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Formatting answers and questions for prompt.") + formatted_text = "" + for i, entry in enumerate(answers, start=1): + formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" + formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" + logging.info("POST - speaking_task_1 - " + str( + request_id) + " - Formatted answers and questions for prompt: " + formatted_text) + + grade_message = ( + 'Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a ' + 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' + 'assign a score of 0 if the response fails to address the question. Additionally, provide ' + 'detailed commentary highlighting both strengths and weaknesses in the response.' + "\n\n The questions and answers are: \n\n'" + formatted_text) + + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) + }, + { + "role": "user", + "content": grade_message + }, + { + "role": "user", + "content": 'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the ' + 'student that they should be.' + }, + { + "role": "user", + "content": 'For pronunciations act as if you heard the answers and they were transcripted as you heard them.' + }, + { + "role": "user", + "content": 'The comments must be long, detailed, justify the grading and suggest improvements.' + } + ] + token_count = count_total_tokens(messages) + + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.") + response = make_openai_call(GPT_4_O, messages, token_count, ["comment"], + GRADING_TEMPERATURE) + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answers graded: " + str(response)) + + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Adding perfect answers to response.") + for i, answer in enumerate(perfect_answers, start=1): + response['perfect_answer_' + str(i)] = answer + + logging.info("POST - speaking_task_1 - " + str( + request_id) + " - Adding transcript and fixed texts to response.") + for i, answer in enumerate(text_answers, start=1): + response['transcript_' + str(i)] = answer + response['fixed_text_' + str(i)] = get_speaking_corrections(answer) + + if response["overall"] == "0.0" or response["overall"] == 0.0: + response["overall"] = round((response["task_response"]["Fluency and Coherence"] + + response["task_response"]["Lexical Resource"] + response["task_response"][ + "Grammatical Range and Accuracy"] + response["task_response"][ + "Pronunciation"]) / 4, 1) + + logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response)) + return response except Exception as e: - os.remove(sound_file_name) return str(e), 400 @@ -558,37 +592,53 @@ def grade_speaking_task_1(): @jwt_required() def get_speaking_task_1_question(): difficulty = request.args.get("difficulty", default=random.choice(difficulties)) - topic = request.args.get("topic", default=random.choice(mti_topics)) + first_topic = request.args.get("first_topic", default=random.choice(mti_topics)) + second_topic = request.args.get("second_topic", default=random.choice(mti_topics)) + + json_format = { + "first_topic": "topic 1", + "second_topic": "topic 2", + "questions": [ + "Introductory question, should start with a greeting and introduce a question about the first topic.", + "Follow up question about the first topic", + "Follow up question about the first topic", + "Question about second topic", + "Follow up question about the second topic", + ] + } + try: messages = [ { "role": "system", "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' - '{"topic": "topic", "question": "question"}') + 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) }, { "role": "user", "content": ( - 'Craft a thought-provoking question of ' + difficulty + ' difficulty for IELTS Speaking Part 1 ' - 'that encourages candidates to delve deeply into ' - 'personal experiences, preferences, or insights on the topic ' - 'of "' + topic + '". Instruct the candidate ' - 'to offer not only detailed ' - 'descriptions but also provide ' - 'nuanced explanations, examples, ' - 'or anecdotes to enrich their response. ' - 'Make sure that the generated question ' - 'does not contain forbidden subjects in ' - 'muslim countries.') + 'Craft 5 thought-provoking questions of ' + difficulty + ' difficulty for IELTS Speaking Part 1 ' + 'that encourages candidates to delve deeply into ' + 'personal experiences, preferences, or insights on the topic ' + 'of "' + first_topic + '" and the topic of "' + second_topic + '". Instruct the candidate ' + 'to offer not only detailed ' + 'descriptions but also provide ' + 'nuanced explanations, examples, ' + 'or anecdotes to enrich their response. ' + 'Make sure that the generated question ' + 'does not contain forbidden subjects in ' + 'muslim countries.') + }, + { + "role": "user", + "content": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).' } ] token_count = count_total_tokens(messages) - response = make_openai_call(GPT_4_O, messages, token_count, ["topic"], + response = make_openai_call(GPT_4_O, messages, token_count, ["first_topic"], GEN_QUESTION_TEMPERATURE) response["type"] = 1 response["difficulty"] = difficulty - response["topic"] = topic return response except Exception as e: return str(e) @@ -751,16 +801,16 @@ def get_speaking_task_2_question(): "role": "user", "content": ( 'Create a question of ' + difficulty + ' difficulty for IELTS Speaking Part 2 ' - 'that encourages candidates to narrate a ' - 'personal experience or story related to the topic ' - 'of "' + topic + '". Include 3 prompts that ' - 'guide the candidate to describe ' - 'specific aspects of the experience, ' - 'such as details about the situation, ' - 'their actions, and the reasons it left a ' - 'lasting impression. Make sure that the ' - 'generated question does not contain ' - 'forbidden subjects in muslim countries.') + 'that encourages candidates to narrate a ' + 'personal experience or story related to the topic ' + 'of "' + topic + '". Include 3 prompts that ' + 'guide the candidate to describe ' + 'specific aspects of the experience, ' + 'such as details about the situation, ' + 'their actions, and the reasons it left a ' + 'lasting impression. Make sure that the ' + 'generated question does not contain ' + 'forbidden subjects in muslim countries.') } ] token_count = count_total_tokens(messages) @@ -884,6 +934,7 @@ def grade_speaking_task_3(): token_count, ["answer"], GEN_QUESTION_TEMPERATURE)) + json_format = { "comment": "extensive comment about answer quality", "overall": 0.0, @@ -907,20 +958,6 @@ def grade_speaking_task_3(): } } - messages = [ - { - "role": "system", - "content": ( - 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) - } - ] - message = ( - "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide detailed " - "commentary highlighting both strengths and weaknesses in the response." - "\n\n The questions and answers are: \n\n'") - logging.info("POST - speaking_task_3 - " + str(request_id) + " - Formatting answers and questions for prompt.") formatted_text = "" for i, entry in enumerate(answers, start=1): @@ -929,17 +966,36 @@ def grade_speaking_task_3(): logging.info("POST - speaking_task_3 - " + str( request_id) + " - Formatted answers and questions for prompt: " + formatted_text) - message += formatted_text + grade_message = ( + "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide detailed " + "commentary highlighting both strengths and weaknesses in the response." + "\n\n The questions and answers are: \n\n'") - messages.append({ - "role": "user", - "content": message - }) - - messages.append({ - "role": "user", - "content": 'Address the student as "you"' - }) + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) + }, + { + "role": "user", + "content": grade_message + }, + { + "role": "user", + "content": 'Address the student as "you".' + }, + { + "role": "user", + "content": 'For pronunciations act as if you heard the answers and they were transcripted as you heard them.' + }, + { + "role": "user", + "content": 'The comments must be long, detailed, justify the grading and suggest improvements.' + } + ] token_count = count_total_tokens(messages) diff --git a/helper/heygen_api.py b/helper/heygen_api.py index 149ed70..d0e2d8c 100644 --- a/helper/heygen_api.py +++ b/helper/heygen_api.py @@ -29,26 +29,32 @@ GET_HEADER = { def create_videos_and_save_to_db(exercises, template, id): + avatar = random.choice(list(AvatarEnum)) # Speaking 1 # Using list comprehension to find the element with the desired value in the 'type' field found_exercises_1 = [element for element in exercises if element.get('type') == 1] # Check if any elements were found if found_exercises_1: exercise_1 = found_exercises_1[0] + sp1_questions = [] app.app.logger.info('Creating video for speaking part 1') - sp1_result = create_video(exercise_1["question"], random.choice(list(AvatarEnum))) - if sp1_result is not None: - sound_file_path = VIDEO_FILES_PATH + sp1_result - firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result - url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path) - sp1_video_path = firebase_file_path - sp1_video_url = url - template["exercises"][0]["text"] = exercise_1["question"] - template["exercises"][0]["title"] = exercise_1["topic"] - template["exercises"][0]["video_url"] = sp1_video_url - template["exercises"][0]["video_path"] = sp1_video_path - else: - app.app.logger.error("Failed to create video for part 1 question: " + exercise_1["question"]) + for question in exercise_1["questions"]: + sp1_result = create_video(question, avatar) + if sp1_result is not None: + sound_file_path = VIDEO_FILES_PATH + sp1_result + firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result + url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path) + video = { + "text": question, + "video_path": firebase_file_path, + "video_url": url + } + sp1_questions.append(video) + else: + app.app.logger.error("Failed to create video for part 1 question: " + exercise_1["question"]) + template["exercises"][0]["prompts"] = sp1_questions + template["exercises"][0]["first_title"] = exercise_1["first_topic"] + template["exercises"][0]["second_title"] = exercise_1["second_topic"] # Speaking 2 # Using list comprehension to find the element with the desired value in the 'type' field @@ -57,7 +63,7 @@ def create_videos_and_save_to_db(exercises, template, id): if found_exercises_2: exercise_2 = found_exercises_2[0] app.app.logger.info('Creating video for speaking part 2') - sp2_result = create_video(exercise_2["question"], random.choice(list(AvatarEnum))) + sp2_result = create_video(exercise_2["question"], avatar) if sp2_result is not None: sound_file_path = VIDEO_FILES_PATH + sp2_result firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result @@ -79,7 +85,6 @@ def create_videos_and_save_to_db(exercises, template, id): if found_exercises_3: exercise_3 = found_exercises_3[0] sp3_questions = [] - avatar = random.choice(list(AvatarEnum)) app.app.logger.info('Creating videos for speaking part 3') for question in exercise_3["questions"]: result = create_video(question, avatar) diff --git a/helper/question_templates.py b/helper/question_templates.py index b065626..a6edfa8 100644 --- a/helper/question_templates.py +++ b/helper/question_templates.py @@ -1136,12 +1136,11 @@ def getSpeakingTemplate(): "exercises": [ { "id": str(uuid.uuid4()), - "prompts": [], - "text": "text", - "title": "topic", - "video_url": "sp1_video_url", - "video_path": "sp1_video_path", - "type": "speaking" + "prompts": ["questions"], + "text": "Listen carefully and respond.", + "first_title": "first_topic", + "second_title": "second_topic", + "type": "interactiveSpeaking" }, { "id": str(uuid.uuid4()),