From bc2cedb8218318a2e834f3534bc6cb82e52004b2 Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Tue, 23 Jan 2024 23:23:17 +0000 Subject: [PATCH] Improve grading to be more strict and give 0 if the question is not addressed. --- app.py | 82 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/app.py b/app.py index db865e1..d1a36a9 100644 --- a/app.py +++ b/app.py @@ -207,14 +207,15 @@ def grade_writing_task_1(): question = data.get('question') answer = data.get('answer') if has_words(answer): - message = ( - "Grade this Writing Task 1 answer according to ielts grading system and provide an example of a perfect " - "answer (min 150 words) and an elaborated comment where you deep dive into what is wrong and right" - " about the answer. Provide your answer on the following json format: {'perfect_answer': 'example " - "perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, " - "'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, " - "'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' " - "and the answer was '" + answer + "'") + message = ("Evaluate the given Writing Task 1 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide an " + "exemplary answer with a minimum of 150 words, along with a detailed commentary highlighting " + "both strengths and weaknesses in the response. Present your evaluation in JSON format with " + "the following structure: {'perfect_answer': 'example perfect answer', 'comment': " + "'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " + "'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " + "0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], @@ -261,14 +262,15 @@ def grade_writing_task_2(): question = data.get('question') answer = data.get('answer') if has_words(answer): - message = ( - "Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect " - "answer (min 250 words) and an elaborated comment where you deep dive into what is wrong and right " - "about the answer. Provide your answer on the following json format: {'perfect_answer': 'example " - "perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, " - "'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, " - "'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' " - "and the answer was '" + answer + "'") + message = ("Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide an " + "exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting " + "both strengths and weaknesses in the response. Present your evaluation in JSON format with " + "the following structure: {'perfect_answer': 'example perfect answer', 'comment': " + "'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " + "'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " + "0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], @@ -308,6 +310,7 @@ def get_writing_task_2_general_question(): except Exception as e: return str(e) + # THE SAVING OF WRITING IS DONE WITHOUT THE API ON THE FRONTEND # @app.route('/writing', methods=['POST']) # @jwt_required() @@ -342,13 +345,14 @@ def grade_speaking_task_1(): download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) answer = speech_to_text(sound_file_name) if has_10_words(answer): - message = ( - "Grade this Speaking Part 1 answer according to ielts grading system and provide an elaborated " - "comment where you deep dive into what is wrong and right about the answer." - "Please assign a grade of 0 if the answer provided does not address the question." - "Provide your answer on the following json format: {'comment': 'comment about answer quality', 'overall': 0.0, " - "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': 0.0, " - "'Pronunciation': 0.0}}\n The question was '" + question + "' and the answer was '" + answer + "'") + message = ("Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide " + "detailed commentary highlighting both strengths and weaknesses in the response. Present your " + "evaluation in JSON format with " + "the following structure: {'comment': 'comment about answer quality', 'overall': 0.0, " + "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range " + "and Accuracy': 0.0, 'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], @@ -411,13 +415,15 @@ def grade_speaking_task_2(): download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) answer = speech_to_text(sound_file_name) if has_10_words(answer): - message = ( - "Grade this Speaking Part 2 answer according to ielts grading system and provide an elaborated " - "comment where you deep dive into what is wrong and right about the answer." - "Please assign a grade of 0 if the answer provided does not address the question." - "Provide your answer on the following json format: {'comment': 'comment about answer quality', 'overall': 0.0, " - "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': 0.0, " - "'Pronunciation': 0.0}}\n The question was '" + question + "' and the answer was '" + answer + "'") + message = ("Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide " + "detailed commentary highlighting both strengths and weaknesses in the response. Present your " + "evaluation in JSON format with " + "the following structure: {'comment': 'comment about answer quality', 'overall': 0.0, " + "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range " + "and Accuracy': 0.0, " + "'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], @@ -525,9 +531,10 @@ def grade_speaking_task_3(): None, GEN_QUESTION_TEMPERATURE)) message = ( - "Grade this Speaking Part 3 answer according to ielts grading system and provide " - "an elaborated comment where you deep dive into what is wrong and right about the answers." - "Please assign a grade of 0 if the answer provided does not address the question." + "Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a " + "strict assessment that penalizes errors. Deduct points for deviations from the task, and " + "assign a score of 0 if the response fails to address the question. Additionally, provide detailed " + "commentary highlighting both strengths and weaknesses in the response." "\n\n The questions and answers are: \n\n'") formatted_text = "" @@ -587,6 +594,7 @@ def save_speaking(): except Exception as e: return str(e) + @app.route("/speaking/generate_speaking_video", methods=['POST']) @jwt_required() def generate_speaking_video(): @@ -602,7 +610,7 @@ def generate_speaking_video(): return { "text": data["question"], - "prompts": data["prompts"] if "prompts" in data else [], + "prompts": data["prompts"] if "prompts" in data else [], "title": data["topic"], "video_url": sp1_video_url, "video_path": sp1_video_path, @@ -612,10 +620,11 @@ def generate_speaking_video(): else: app.logger.error("Failed to create video for part 1 question: " + data["question"]) return str("Failed to create video for part 1 question: " + data["question"]) - + except Exception as e: return str(e) + @app.route("/speaking/generate_interactive_video", methods=['POST']) @jwt_required() def generate_interactive_video(): @@ -645,10 +654,11 @@ def generate_interactive_video(): "title": data["topic"], "type": "interactiveSpeaking", "id": uuid.uuid4() - } + } except Exception as e: return str(e) + @app.route('/reading_passage_1', methods=['GET']) @jwt_required() def get_reading_passage_1_question():