Merged in ft-cf-2-add-writing-task-1-grading (pull request #2)

Add writing task 1 grading.
2023-06-29 21:06:31 +00:00
parent a784400568 7a1dbb76de
commit 0b661fe108
3 changed files with 155 additions and 10 deletions
--- a/app.py
+++ b/app.py
@@ -26,16 +26,31 @@ firebase_admin.initialize_app(cred)
 GRADING_TEMPERATURE = 0.1
 GEN_QUESTION_TEMPERATURE = 0.7
-WRITING_TASK_2_POST_FIELDS = ['overall', 'comment', 'task_response']
+GRADING_FIELDS = ['overall', 'comment', 'task_response']
-WRITING_TASK_2_GET_FIELDS = ['question']
+GEN_FIELDS = ['question']
 FIREBASE_BUCKET = 'mti-ielts.appspot.com'
 AUDIO_FILES_PATH = 'download-audio/'
@app.route('/writing_task1', methods=['POST'])
@jwt_required()
 def grade_writing_task_1():
    try:
        data = request.get_json()
        question = data.get('question')
        context = data.get('context')
        answer = data.get('answer')
        messages = get_grading_messages(QuestionType.WRITING_TASK_1, question, answer, context)
        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
        response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
        return response
    except Exception as e:
        return str(e)
@app.route('/writing_task2', methods=['POST'])
@jwt_required()
-def grade_writing_task():
+def grade_writing_task_2():
    try:
        data = request.get_json()
        question = data.get('question')
@@ -43,7 +58,7 @@ def grade_writing_task():
        messages = get_grading_messages(QuestionType.WRITING_TASK_2, question, answer)
        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
-        response = make_openai_call(messages, token_count, WRITING_TASK_2_POST_FIELDS, GRADING_TEMPERATURE)
+        response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
        return response
    except Exception as e:
        return str(e)
@@ -51,12 +66,12 @@ def grade_writing_task():
@app.route('/writing_task2', methods=['GET'])
@jwt_required()
-def get_writing_task_question():
+def get_writing_task_2_question():
    try:
        messages = get_question_gen_messages(QuestionType.WRITING_TASK_2)
        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
-        response = make_openai_call(messages, token_count, WRITING_TASK_2_GET_FIELDS, GEN_QUESTION_TEMPERATURE)
+        response = make_openai_call(messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
        return response
    except Exception as e:
        return str(e)
@@ -76,7 +91,7 @@ def grade_speaking_task():
        messages = get_grading_messages(QuestionType.SPEAKING, question, answer)
        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
-        response = make_openai_call(messages, token_count, WRITING_TASK_2_POST_FIELDS, GRADING_TEMPERATURE)
+        response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
        os.remove(sound_file_name)
        return response
    except Exception as e:
@@ -90,7 +105,7 @@ def get_speaking_task_question():
        messages = get_question_gen_messages(QuestionType.SPEAKING)
        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
-        response = make_openai_call(messages, token_count, WRITING_TASK_2_GET_FIELDS, GEN_QUESTION_TEMPERATURE)
+        response = make_openai_call(messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
        return response
    except Exception as e:
        return str(e)
--- a/helper/api_messages.py
+++ b/helper/api_messages.py
@@ -2,12 +2,60 @@ from enum import Enum
 class QuestionType(Enum):
    WRITING_TASK_1 = "Writing Task 1"
    WRITING_TASK_2 = "Writing Task 2"
    SPEAKING = "Speaking Task"
-def get_grading_messages(question_type: QuestionType, question: str, answer: str):
+def get_grading_messages(question_type: QuestionType, question: str, answer: str, context: str = None):
-    if QuestionType.WRITING_TASK_2 == question_type:
+    if QuestionType.WRITING_TASK_1 == question_type:
        messages = [
            {
                "role": "user",
                "content": "You are a IELTS examiner.",
            },
            {
                "role": "user",
                "content": f"The question you have to grade is of type Writing Task 1 and is the following: {question}",
            }
        ]
        if not (context is None or context == ""):
            messages.append({
                "role": "user",
                "content": f"To grade the previous question, bear in mind the following context: {context}",
            })
        messages.extend([
            {
                "role": "user",
                "content": "It is mandatory for you to provide your response with the overall grade and breakdown grades, "
                           "with just the following json format: {'comment': 'comment about answer quality', 'overall': 7.0, "
                           "'task_response': {'Task Achievement': 8.0, 'Coherence and Cohesion': 6.5, 'Lexical Resource': 7.5, "
                           "'Grammatical Range and Accuracy': 6.0}}",
            },
            {
                "role": "user",
                "content": "Example output: { 'comment': 'Overall, the response is good but there are some areas that need "
                           "improvement.\n\nIn terms of Task Achievement, the writer has addressed all parts of the question "
                           "and has provided a clear opinion on the topic. However, some of the points made are not fully "
                           "developed or supported with examples.\n\nIn terms of Coherence and Cohesion, there is a clear "
                           "structure to the response with an introduction, body paragraphs and conclusion. However, there "
                           "are some issues with cohesion as some sentences do not flow smoothly from one to another.\n\nIn "
                           "terms of Lexical Resource, there is a good range of vocabulary used throughout the response and "
                           "some less common words have been used effectively.\n\nIn terms of Grammatical Range and Accuracy, "
                           "there are some errors in grammar and sentence structure which affect clarity in places.\n\nOverall, "
                           "this response would score a band 6.5.', 'overall': 6.5, 'task_response': "
                           "{ 'Coherence and Cohesion': 6.5, 'Grammatical Range and Accuracy': 6.0, 'Lexical Resource': 7.0, "
                           "'Task Achievement': 7.0}}",
            },
            {
                "role": "user",
                "content": f"Evaluate this answer according to ielts grading system: {answer}",
            },
        ])
        return messages
    elif QuestionType.WRITING_TASK_2 == question_type:
        return [
            {
                "role": "user",
--- a/postman/ielts.postman_collection.json
+++ b/postman/ielts.postman_collection.json
@@ -6,6 +6,88 @@
 		"_exporter_id": "26107457"
 	},
 	"item": [
 		{
 			"name": "Grade Answer Writing Task 1 With Context",
 			"request": {
 				"auth": {
 					"type": "bearer",
 					"bearer": [
 						{
 							"key": "token",
 							"value": "{{jwt_token}}",
 							"type": "string"
 						}
 					]
 				},
 				"method": "POST",
 				"header": [],
 				"body": {
 					"mode": "raw",
 					"raw": "{\r\n    \"question\": \"The chart below shows the amount of money per week spent on fast foods in Britain. The graph shows the trends in consumption of fast-foods. Write a report for a university lecturer describing the information shown below.\",\r\n    \"answer\": \"The chart shows that high income earners consumed considerably more fast foods than the other income groups, spending more than twice as much on hamburgers (43 pence per person per week) than on fish and chips or pizza (both under 20 pence). Average income earners also favoured hamburgers, spending 33 pence per person per week, followed by fish and chips at 24 pence, then pizza at 11 pence. Low income earners appear to spend less than other income groups on fast foods, though fish and chips remains their most popular fast food, followed by hamburgers and then pizza. From the graph we can see that in 1970, fish and chips were twice as popular as burgers, pizza being at that time the least popular fast food. The consumption of hamburgers and pizza has risen steadily over the 20 year period to 1990 while the consumption of fish and chips has been in decline over that same period with a slight increase in popularity since 1985.\"\r\n}",
 					"options": {
 						"raw": {
 							"language": "json"
 						}
 					}
 				},
 				"url": {
 					"raw": "http://127.0.0.1:5000/writing_task1",
 					"protocol": "http",
 					"host": [
 						"127",
 						"0",
 						"0",
 						"1"
 					],
 					"port": "5000",
 					"path": [
 						"writing_task1"
 					]
 				}
 			},
 			"response": []
 		},
 		{
 			"name": "Grade Answer Writing Task 1 NO Context",
 			"request": {
 				"auth": {
 					"type": "bearer",
 					"bearer": [
 						{
 							"key": "token",
 							"value": "{{jwt_token}}",
 							"type": "string"
 						}
 					]
 				},
 				"method": "POST",
 				"header": [],
 				"body": {
 					"mode": "raw",
 					"raw": "{\r\n    \"question\": \"The chart below shows the amount of money per week spent on fast foods in Britain. The graph shows the trends in consumption of fast-foods. Write a report for a university lecturer describing the information shown below.\",\r\n    \"answer\": \"The chart shows that high income earners consumed considerably more fast foods than the other income groups, spending more than twice as much on hamburgers (43 pence per person per week) than on fish and chips or pizza (both under 20 pence). Average income earners also favoured hamburgers, spending 33 pence per person per week, followed by fish and chips at 24 pence, then pizza at 11 pence. Low income earners appear to spend less than other income groups on fast foods, though fish and chips remains their most popular fast food, followed by hamburgers and then pizza. From the graph we can see that in 1970, fish and chips were twice as popular as burgers, pizza being at that time the least popular fast food. The consumption of hamburgers and pizza has risen steadily over the 20 year period to 1990 while the consumption of fish and chips has been in decline over that same period with a slight increase in popularity since 1985.\"\r\n}",
 					"options": {
 						"raw": {
 							"language": "json"
 						}
 					}
 				},
 				"url": {
 					"raw": "http://127.0.0.1:5000/writing_task1",
 					"protocol": "http",
 					"host": [
 						"127",
 						"0",
 						"0",
 						"1"
 					],
 					"port": "5000",
 					"path": [
 						"writing_task1"
 					]
 				}
 			},
 			"response": []
 		},
 		{
 			"name": "Grade Answer Writing Task 2",
 			"request": {