Merged in ft-cf-2-add-writing-task-1-grading (pull request #2)
Add writing task 1 grading.
This commit is contained in:
31
app.py
31
app.py
@@ -26,16 +26,31 @@ firebase_admin.initialize_app(cred)
|
|||||||
|
|
||||||
GRADING_TEMPERATURE = 0.1
|
GRADING_TEMPERATURE = 0.1
|
||||||
GEN_QUESTION_TEMPERATURE = 0.7
|
GEN_QUESTION_TEMPERATURE = 0.7
|
||||||
WRITING_TASK_2_POST_FIELDS = ['overall', 'comment', 'task_response']
|
GRADING_FIELDS = ['overall', 'comment', 'task_response']
|
||||||
WRITING_TASK_2_GET_FIELDS = ['question']
|
GEN_FIELDS = ['question']
|
||||||
|
|
||||||
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
|
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
|
||||||
AUDIO_FILES_PATH = 'download-audio/'
|
AUDIO_FILES_PATH = 'download-audio/'
|
||||||
|
|
||||||
|
@app.route('/writing_task1', methods=['POST'])
|
||||||
|
@jwt_required()
|
||||||
|
def grade_writing_task_1():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
question = data.get('question')
|
||||||
|
context = data.get('context')
|
||||||
|
answer = data.get('answer')
|
||||||
|
messages = get_grading_messages(QuestionType.WRITING_TASK_1, question, answer, context)
|
||||||
|
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||||
|
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||||
|
response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
return str(e)
|
||||||
|
|
||||||
@app.route('/writing_task2', methods=['POST'])
|
@app.route('/writing_task2', methods=['POST'])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def grade_writing_task():
|
def grade_writing_task_2():
|
||||||
try:
|
try:
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
question = data.get('question')
|
question = data.get('question')
|
||||||
@@ -43,7 +58,7 @@ def grade_writing_task():
|
|||||||
messages = get_grading_messages(QuestionType.WRITING_TASK_2, question, answer)
|
messages = get_grading_messages(QuestionType.WRITING_TASK_2, question, answer)
|
||||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||||
response = make_openai_call(messages, token_count, WRITING_TASK_2_POST_FIELDS, GRADING_TEMPERATURE)
|
response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return str(e)
|
return str(e)
|
||||||
@@ -51,12 +66,12 @@ def grade_writing_task():
|
|||||||
|
|
||||||
@app.route('/writing_task2', methods=['GET'])
|
@app.route('/writing_task2', methods=['GET'])
|
||||||
@jwt_required()
|
@jwt_required()
|
||||||
def get_writing_task_question():
|
def get_writing_task_2_question():
|
||||||
try:
|
try:
|
||||||
messages = get_question_gen_messages(QuestionType.WRITING_TASK_2)
|
messages = get_question_gen_messages(QuestionType.WRITING_TASK_2)
|
||||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||||
response = make_openai_call(messages, token_count, WRITING_TASK_2_GET_FIELDS, GEN_QUESTION_TEMPERATURE)
|
response = make_openai_call(messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return str(e)
|
return str(e)
|
||||||
@@ -76,7 +91,7 @@ def grade_speaking_task():
|
|||||||
messages = get_grading_messages(QuestionType.SPEAKING, question, answer)
|
messages = get_grading_messages(QuestionType.SPEAKING, question, answer)
|
||||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||||
response = make_openai_call(messages, token_count, WRITING_TASK_2_POST_FIELDS, GRADING_TEMPERATURE)
|
response = make_openai_call(messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
|
||||||
os.remove(sound_file_name)
|
os.remove(sound_file_name)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -90,7 +105,7 @@ def get_speaking_task_question():
|
|||||||
messages = get_question_gen_messages(QuestionType.SPEAKING)
|
messages = get_question_gen_messages(QuestionType.SPEAKING)
|
||||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||||
response = make_openai_call(messages, token_count, WRITING_TASK_2_GET_FIELDS, GEN_QUESTION_TEMPERATURE)
|
response = make_openai_call(messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return str(e)
|
return str(e)
|
||||||
|
|||||||
@@ -2,12 +2,60 @@ from enum import Enum
|
|||||||
|
|
||||||
|
|
||||||
class QuestionType(Enum):
|
class QuestionType(Enum):
|
||||||
|
WRITING_TASK_1 = "Writing Task 1"
|
||||||
WRITING_TASK_2 = "Writing Task 2"
|
WRITING_TASK_2 = "Writing Task 2"
|
||||||
SPEAKING = "Speaking Task"
|
SPEAKING = "Speaking Task"
|
||||||
|
|
||||||
|
|
||||||
def get_grading_messages(question_type: QuestionType, question: str, answer: str):
|
def get_grading_messages(question_type: QuestionType, question: str, answer: str, context: str = None):
|
||||||
if QuestionType.WRITING_TASK_2 == question_type:
|
if QuestionType.WRITING_TASK_1 == question_type:
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "You are a IELTS examiner.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"The question you have to grade is of type Writing Task 1 and is the following: {question}",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
if not (context is None or context == ""):
|
||||||
|
messages.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": f"To grade the previous question, bear in mind the following context: {context}",
|
||||||
|
})
|
||||||
|
|
||||||
|
messages.extend([
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "It is mandatory for you to provide your response with the overall grade and breakdown grades, "
|
||||||
|
"with just the following json format: {'comment': 'comment about answer quality', 'overall': 7.0, "
|
||||||
|
"'task_response': {'Task Achievement': 8.0, 'Coherence and Cohesion': 6.5, 'Lexical Resource': 7.5, "
|
||||||
|
"'Grammatical Range and Accuracy': 6.0}}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Example output: { 'comment': 'Overall, the response is good but there are some areas that need "
|
||||||
|
"improvement.\n\nIn terms of Task Achievement, the writer has addressed all parts of the question "
|
||||||
|
"and has provided a clear opinion on the topic. However, some of the points made are not fully "
|
||||||
|
"developed or supported with examples.\n\nIn terms of Coherence and Cohesion, there is a clear "
|
||||||
|
"structure to the response with an introduction, body paragraphs and conclusion. However, there "
|
||||||
|
"are some issues with cohesion as some sentences do not flow smoothly from one to another.\n\nIn "
|
||||||
|
"terms of Lexical Resource, there is a good range of vocabulary used throughout the response and "
|
||||||
|
"some less common words have been used effectively.\n\nIn terms of Grammatical Range and Accuracy, "
|
||||||
|
"there are some errors in grammar and sentence structure which affect clarity in places.\n\nOverall, "
|
||||||
|
"this response would score a band 6.5.', 'overall': 6.5, 'task_response': "
|
||||||
|
"{ 'Coherence and Cohesion': 6.5, 'Grammatical Range and Accuracy': 6.0, 'Lexical Resource': 7.0, "
|
||||||
|
"'Task Achievement': 7.0}}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Evaluate this answer according to ielts grading system: {answer}",
|
||||||
|
},
|
||||||
|
])
|
||||||
|
return messages
|
||||||
|
elif QuestionType.WRITING_TASK_2 == question_type:
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
|||||||
@@ -6,6 +6,88 @@
|
|||||||
"_exporter_id": "26107457"
|
"_exporter_id": "26107457"
|
||||||
},
|
},
|
||||||
"item": [
|
"item": [
|
||||||
|
{
|
||||||
|
"name": "Grade Answer Writing Task 1 With Context",
|
||||||
|
"request": {
|
||||||
|
"auth": {
|
||||||
|
"type": "bearer",
|
||||||
|
"bearer": [
|
||||||
|
{
|
||||||
|
"key": "token",
|
||||||
|
"value": "{{jwt_token}}",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"method": "POST",
|
||||||
|
"header": [],
|
||||||
|
"body": {
|
||||||
|
"mode": "raw",
|
||||||
|
"raw": "{\r\n \"question\": \"The chart below shows the amount of money per week spent on fast foods in Britain. The graph shows the trends in consumption of fast-foods. Write a report for a university lecturer describing the information shown below.\",\r\n \"answer\": \"The chart shows that high income earners consumed considerably more fast foods than the other income groups, spending more than twice as much on hamburgers (43 pence per person per week) than on fish and chips or pizza (both under 20 pence). Average income earners also favoured hamburgers, spending 33 pence per person per week, followed by fish and chips at 24 pence, then pizza at 11 pence. Low income earners appear to spend less than other income groups on fast foods, though fish and chips remains their most popular fast food, followed by hamburgers and then pizza. From the graph we can see that in 1970, fish and chips were twice as popular as burgers, pizza being at that time the least popular fast food. The consumption of hamburgers and pizza has risen steadily over the 20 year period to 1990 while the consumption of fish and chips has been in decline over that same period with a slight increase in popularity since 1985.\"\r\n}",
|
||||||
|
"options": {
|
||||||
|
"raw": {
|
||||||
|
"language": "json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"raw": "http://127.0.0.1:5000/writing_task1",
|
||||||
|
"protocol": "http",
|
||||||
|
"host": [
|
||||||
|
"127",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"1"
|
||||||
|
],
|
||||||
|
"port": "5000",
|
||||||
|
"path": [
|
||||||
|
"writing_task1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Grade Answer Writing Task 1 NO Context",
|
||||||
|
"request": {
|
||||||
|
"auth": {
|
||||||
|
"type": "bearer",
|
||||||
|
"bearer": [
|
||||||
|
{
|
||||||
|
"key": "token",
|
||||||
|
"value": "{{jwt_token}}",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"method": "POST",
|
||||||
|
"header": [],
|
||||||
|
"body": {
|
||||||
|
"mode": "raw",
|
||||||
|
"raw": "{\r\n \"question\": \"The chart below shows the amount of money per week spent on fast foods in Britain. The graph shows the trends in consumption of fast-foods. Write a report for a university lecturer describing the information shown below.\",\r\n \"answer\": \"The chart shows that high income earners consumed considerably more fast foods than the other income groups, spending more than twice as much on hamburgers (43 pence per person per week) than on fish and chips or pizza (both under 20 pence). Average income earners also favoured hamburgers, spending 33 pence per person per week, followed by fish and chips at 24 pence, then pizza at 11 pence. Low income earners appear to spend less than other income groups on fast foods, though fish and chips remains their most popular fast food, followed by hamburgers and then pizza. From the graph we can see that in 1970, fish and chips were twice as popular as burgers, pizza being at that time the least popular fast food. The consumption of hamburgers and pizza has risen steadily over the 20 year period to 1990 while the consumption of fish and chips has been in decline over that same period with a slight increase in popularity since 1985.\"\r\n}",
|
||||||
|
"options": {
|
||||||
|
"raw": {
|
||||||
|
"language": "json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"raw": "http://127.0.0.1:5000/writing_task1",
|
||||||
|
"protocol": "http",
|
||||||
|
"host": [
|
||||||
|
"127",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"1"
|
||||||
|
],
|
||||||
|
"port": "5000",
|
||||||
|
"path": [
|
||||||
|
"writing_task1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Grade Answer Writing Task 2",
|
"name": "Grade Answer Writing Task 2",
|
||||||
"request": {
|
"request": {
|
||||||
|
|||||||
Reference in New Issue
Block a user