From b4dc6be92779740a4f818c25c28614d42b8ff687 Mon Sep 17 00:00:00 2001
From: Cristiano Ferreira <cristiano.ferreira@ecrop.dev>
Date: Tue, 16 Jul 2024 21:35:36 +0100
Subject: [PATCH] Add comment to grading of writing.

---
 app.py              | 165 +++++++++++++++++++++++++++++++++-----------
 helper/exercises.py |  21 ++++++
 2 files changed, 145 insertions(+), 41 deletions(-)

diff --git a/app.py b/app.py
index 98a71b4..24ecb52 100644
--- a/app.py
+++ b/app.py
@@ -222,10 +222,22 @@ def grade_writing_task_1():
                 'comment': "The answer does not contain enough english words.",
                 'overall': 0,
                 'task_response': {
-                    'Coherence and Cohesion': 0,
-                    'Grammatical Range and Accuracy': 0,
-                    'Lexical Resource': 0,
-                    'Task Achievement': 0
+                    'Coherence and Cohesion': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Grammatical Range and Accuracy': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Lexical Resource': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Task Achievement': {
+                            "grade": 0.0,
+                            "comment": ""
+                        }
                 }
             }
         elif not has_x_words(answer, 100):
@@ -233,40 +245,68 @@ def grade_writing_task_1():
                 'comment': "The answer is insufficient and too small to be graded.",
                 'overall': 0,
                 'task_response': {
-                    'Coherence and Cohesion': 0,
-                    'Grammatical Range and Accuracy': 0,
-                    'Lexical Resource': 0,
-                    'Task Achievement': 0
+                    'Coherence and Cohesion': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Grammatical Range and Accuracy': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Lexical Resource': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Task Achievement': {
+                            "grade": 0.0,
+                            "comment": ""
+                        }
                 }
             }
         else:
+            json_format = {
+                "comment": "comment about student's response quality",
+                "overall": 0.0,
+                "task_response": {
+                    "Coherence and Cohesion": {
+                        "grade": 0.0,
+                        "comment": "comment about Coherence and Cohesion of the student's response"
+                    },
+                    "Grammatical Range and Accuracy": {
+                        "grade": 0.0,
+                        "comment": "comment about Grammatical Range and Accuracy of the student's response"
+                    },
+                    "Lexical Resource": {
+                        "grade": 0.0,
+                        "comment": "comment about Lexical Resource of the student's response"
+                    },
+                    "Task Achievement": {
+                        "grade": 0.0,
+                        "comment": "comment about Task Achievement of the student's response"
+                    }
+                }
+            }
+
             messages = [
                 {
                     "role": "system",
-                    "content": ('You are a helpful assistant designed to output JSON on this format: '
-                                '{"perfect_answer": "example perfect answer", "comment": '
-                                '"comment about answer quality", "overall": 0.0, "task_response": '
-                                '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
-                                '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
+                    "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
                 },
                 {
                     "role": "user",
                     "content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, '
                                 'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
                                 'from the task, and assign a score of 0 if the response fails to address the question. '
-                                'Additionally, provide an exemplary answer with a minimum of 150 words, along with a '
-                                'detailed commentary highlighting both strengths and weaknesses in the response. '
+                                'Additionally, provide a detailed commentary highlighting both strengths and '
+                                'weaknesses in the response. '
                                 '\n Question: "' + question + '" \n Answer: "' + answer + '"')
-                },
-                {
-                    "role": "user",
-                    "content": 'The perfect answer must have at least 150 words.'
                 }
             ]
             token_count = count_total_tokens(messages)
             response = make_openai_call(GPT_3_5_TURBO, messages, token_count,
                                         ["comment"],
                                         GRADING_TEMPERATURE)
+            response["perfect_answer"] = get_perfect_answer(question, 150)["perfect_answer"]
             response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
             response['fixed_text'] = get_fixed_text(answer)
             return response
@@ -322,10 +362,22 @@ def grade_writing_task_2():
                 'comment': "The answer does not contain enough english words.",
                 'overall': 0,
                 'task_response': {
-                    'Coherence and Cohesion': 0,
-                    'Grammatical Range and Accuracy': 0,
-                    'Lexical Resource': 0,
-                    'Task Achievement': 0
+                    'Coherence and Cohesion': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Grammatical Range and Accuracy': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Lexical Resource': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Task Achievement': {
+                            "grade": 0.0,
+                            "comment": ""
+                        }
                 }
             }
         elif not has_x_words(answer, 180):
@@ -333,40 +385,68 @@ def grade_writing_task_2():
                 'comment': "The answer is insufficient and too small to be graded.",
                 'overall': 0,
                 'task_response': {
-                    'Coherence and Cohesion': 0,
-                    'Grammatical Range and Accuracy': 0,
-                    'Lexical Resource': 0,
-                    'Task Achievement': 0
+                    'Coherence and Cohesion': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Grammatical Range and Accuracy': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Lexical Resource': {
+                            "grade": 0.0,
+                            "comment": ""
+                        },
+                    'Task Achievement': {
+                            "grade": 0.0,
+                            "comment": ""
+                        }
                 }
             }
         else:
+            json_format = {
+                "comment": "comment about student's response quality",
+                "overall": 0.0,
+                "task_response": {
+                    "Coherence and Cohesion": {
+                        "grade": 0.0,
+                        "comment": "comment about Coherence and Cohesion of the student's response"
+                    },
+                    "Grammatical Range and Accuracy": {
+                        "grade": 0.0,
+                        "comment": "comment about Grammatical Range and Accuracy of the student's response"
+                    },
+                    "Lexical Resource": {
+                        "grade": 0.0,
+                        "comment": "comment about Lexical Resource of the student's response"
+                    },
+                    "Task Achievement": {
+                        "grade": 0.0,
+                        "comment": "comment about Task Achievement of the student's response"
+                    }
+                }
+            }
+
             messages = [
                 {
                     "role": "system",
-                    "content": ('You are a helpful assistant designed to output JSON on this format: '
-                                '{"perfect_answer": "example perfect answer", "comment": '
-                                '"comment about answer quality", "overall": 0.0, "task_response": '
-                                '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
-                                '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
+                    "content": ('You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
                 },
                 {
                     "role": "user",
                     "content": (
                             'Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a '
                             'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
-                            'assign a score of 0 if the response fails to address the question. Additionally, provide an '
-                            'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting '
+                            'assign a score of 0 if the response fails to address the question. Additionally, provide'
+                            ' a detailed commentary highlighting '
                             'both strengths and weaknesses in the response.'
                             '\n Question: "' + question + '" \n Answer: "' + answer + '"')
-                },
-                {
-                    "role": "user",
-                    "content": 'The perfect answer must have at least 250 words.'
                 }
             ]
             token_count = count_total_tokens(messages)
             response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
                                         GEN_QUESTION_TEMPERATURE)
+            response["perfect_answer"] = get_perfect_answer(question, 250)["perfect_answer"]
             response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
             response['fixed_text'] = get_fixed_text(answer)
             return response
@@ -375,11 +455,14 @@ def grade_writing_task_2():
 
 
 def fix_writing_overall(overall: float, task_response: dict):
-    if overall > max(task_response.values()) or overall < min(task_response.values()):
-        total_sum = sum(task_response.values())
-        average = total_sum / len(task_response.values())
+    grades = [category["grade"] for category in task_response.values()]
+
+    if overall > max(grades) or overall < min(grades):
+        total_sum = sum(grades)
+        average = total_sum / len(grades)
         rounded_average = round(average, 0)
         return rounded_average
+
     return overall
 
 
diff --git a/helper/exercises.py b/helper/exercises.py
index 85d187c..776c0cb 100644
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -238,6 +238,27 @@ def build_write_blanks_solutions_listening(words: [], start_id):
         )
     return solutions
 
+def get_perfect_answer(question: str, size: int):
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"perfect_answer": "perfect answer for the question"}')
+        },
+        {
+            "role": "user",
+            "content": ('Write a perfect answer for this writing exercise of a IELTS exam. Question: ' + question)
+
+        },
+        {
+            "role": "user",
+            "content": ('The answer must have at least ' + str(size) + ' words')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
 
 def generate_reading_passage(type: QuestionType, topic: str):
     messages = [