From b4dc6be92779740a4f818c25c28614d42b8ff687 Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Tue, 16 Jul 2024 21:35:36 +0100 Subject: [PATCH] Add comment to grading of writing. --- app.py | 165 +++++++++++++++++++++++++++++++++----------- helper/exercises.py | 21 ++++++ 2 files changed, 145 insertions(+), 41 deletions(-) diff --git a/app.py b/app.py index 98a71b4..24ecb52 100644 --- a/app.py +++ b/app.py @@ -222,10 +222,22 @@ def grade_writing_task_1(): 'comment': "The answer does not contain enough english words.", 'overall': 0, 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 + 'Coherence and Cohesion': { + "grade": 0.0, + "comment": "" + }, + 'Grammatical Range and Accuracy': { + "grade": 0.0, + "comment": "" + }, + 'Lexical Resource': { + "grade": 0.0, + "comment": "" + }, + 'Task Achievement': { + "grade": 0.0, + "comment": "" + } } } elif not has_x_words(answer, 100): @@ -233,40 +245,68 @@ def grade_writing_task_1(): 'comment': "The answer is insufficient and too small to be graded.", 'overall': 0, 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 + 'Coherence and Cohesion': { + "grade": 0.0, + "comment": "" + }, + 'Grammatical Range and Accuracy': { + "grade": 0.0, + "comment": "" + }, + 'Lexical Resource': { + "grade": 0.0, + "comment": "" + }, + 'Task Achievement': { + "grade": 0.0, + "comment": "" + } } } else: + json_format = { + "comment": "comment about student's response quality", + "overall": 0.0, + "task_response": { + "Coherence and Cohesion": { + "grade": 0.0, + "comment": "comment about Coherence and Cohesion of the student's response" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "comment about Grammatical Range and Accuracy of the student's response" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "comment about Lexical Resource of the student's response" + }, + "Task Achievement": { + "grade": 0.0, + "comment": "comment about Task Achievement of the student's response" + } + } + } + messages = [ { "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"perfect_answer": "example perfect answer", "comment": ' - '"comment about answer quality", "overall": 0.0, "task_response": ' - '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' - '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') + "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) }, { "role": "user", "content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, ' 'ensuring a strict assessment that penalizes errors. Deduct points for deviations ' 'from the task, and assign a score of 0 if the response fails to address the question. ' - 'Additionally, provide an exemplary answer with a minimum of 150 words, along with a ' - 'detailed commentary highlighting both strengths and weaknesses in the response. ' + 'Additionally, provide a detailed commentary highlighting both strengths and ' + 'weaknesses in the response. ' '\n Question: "' + question + '" \n Answer: "' + answer + '"') - }, - { - "role": "user", - "content": 'The perfect answer must have at least 150 words.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], GRADING_TEMPERATURE) + response["perfect_answer"] = get_perfect_answer(question, 150)["perfect_answer"] response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) response['fixed_text'] = get_fixed_text(answer) return response @@ -322,10 +362,22 @@ def grade_writing_task_2(): 'comment': "The answer does not contain enough english words.", 'overall': 0, 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 + 'Coherence and Cohesion': { + "grade": 0.0, + "comment": "" + }, + 'Grammatical Range and Accuracy': { + "grade": 0.0, + "comment": "" + }, + 'Lexical Resource': { + "grade": 0.0, + "comment": "" + }, + 'Task Achievement': { + "grade": 0.0, + "comment": "" + } } } elif not has_x_words(answer, 180): @@ -333,40 +385,68 @@ def grade_writing_task_2(): 'comment': "The answer is insufficient and too small to be graded.", 'overall': 0, 'task_response': { - 'Coherence and Cohesion': 0, - 'Grammatical Range and Accuracy': 0, - 'Lexical Resource': 0, - 'Task Achievement': 0 + 'Coherence and Cohesion': { + "grade": 0.0, + "comment": "" + }, + 'Grammatical Range and Accuracy': { + "grade": 0.0, + "comment": "" + }, + 'Lexical Resource': { + "grade": 0.0, + "comment": "" + }, + 'Task Achievement': { + "grade": 0.0, + "comment": "" + } } } else: + json_format = { + "comment": "comment about student's response quality", + "overall": 0.0, + "task_response": { + "Coherence and Cohesion": { + "grade": 0.0, + "comment": "comment about Coherence and Cohesion of the student's response" + }, + "Grammatical Range and Accuracy": { + "grade": 0.0, + "comment": "comment about Grammatical Range and Accuracy of the student's response" + }, + "Lexical Resource": { + "grade": 0.0, + "comment": "comment about Lexical Resource of the student's response" + }, + "Task Achievement": { + "grade": 0.0, + "comment": "comment about Task Achievement of the student's response" + } + } + } + messages = [ { "role": "system", - "content": ('You are a helpful assistant designed to output JSON on this format: ' - '{"perfect_answer": "example perfect answer", "comment": ' - '"comment about answer quality", "overall": 0.0, "task_response": ' - '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' - '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') + "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) }, { "role": "user", "content": ( 'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a ' 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' - 'assign a score of 0 if the response fails to address the question. Additionally, provide an ' - 'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting ' + 'assign a score of 0 if the response fails to address the question. Additionally, provide' + ' a detailed commentary highlighting ' 'both strengths and weaknesses in the response.' '\n Question: "' + question + '" \n Answer: "' + answer + '"') - }, - { - "role": "user", - "content": 'The perfect answer must have at least 250 words.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, ["comment"], GEN_QUESTION_TEMPERATURE) + response["perfect_answer"] = get_perfect_answer(question, 250)["perfect_answer"] response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) response['fixed_text'] = get_fixed_text(answer) return response @@ -375,11 +455,14 @@ def grade_writing_task_2(): def fix_writing_overall(overall: float, task_response: dict): - if overall > max(task_response.values()) or overall < min(task_response.values()): - total_sum = sum(task_response.values()) - average = total_sum / len(task_response.values()) + grades = [category["grade"] for category in task_response.values()] + + if overall > max(grades) or overall < min(grades): + total_sum = sum(grades) + average = total_sum / len(grades) rounded_average = round(average, 0) return rounded_average + return overall diff --git a/helper/exercises.py b/helper/exercises.py index 85d187c..776c0cb 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -238,6 +238,27 @@ def build_write_blanks_solutions_listening(words: [], start_id): ) return solutions +def get_perfect_answer(question: str, size: int): + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"perfect_answer": "perfect answer for the question"}') + }, + { + "role": "user", + "content": ('Write a perfect answer for this writing exercise of a IELTS exam. Question: ' + question) + + }, + { + "role": "user", + "content": ('The answer must have at least ' + str(size) + ' words') + + } + ] + token_count = count_total_tokens(messages) + return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_reading_passage(type: QuestionType, topic: str): messages = [