Add logging to speaking grading.

This commit is contained in:
Cristiano Ferreira
2024-05-19 15:38:57 +01:00
parent 5f7fe23afd
commit c77f7178ae
2 changed files with 80 additions and 3 deletions

81
app.py
View File

@@ -365,15 +365,24 @@ def get_writing_task_2_general_question():
@app.route('/speaking_task_1', methods=['POST']) @app.route('/speaking_task_1', methods=['POST'])
@jwt_required() @jwt_required()
def grade_speaking_task_1(): def grade_speaking_task_1():
request_id = uuid.uuid4()
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
logging.info("POST - speaking_task_1 - Received request to grade speaking task 1. "
"Use this id to track the logs: " + str(request_id))
try: try:
data = request.get_json() data = request.get_json()
question = data.get('question') question = data.get('question')
answer_firebase_path = data.get('answer') answer_firebase_path = data.get('answer')
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Downloading file " + answer_firebase_path)
download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
logging.info("POST - speaking_task_1 - " + str(
request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name)
answer = speech_to_text(sound_file_name) answer = speech_to_text(sound_file_name)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer)
if has_x_words(answer, 20): if has_x_words(answer, 20):
message = ("Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a " message = ("Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a "
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
@@ -384,26 +393,43 @@ def grade_speaking_task_1():
"'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range " "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range "
"and Accuracy': 0.0, 'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") "and Accuracy': 0.0, 'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'")
token_count = count_tokens(message)["n_tokens"] token_count = count_tokens(message)["n_tokens"]
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.")
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count,
["comment"], ["comment"],
GRADING_TEMPERATURE) GRADING_TEMPERATURE)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response))
perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following " perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following "
"Speaking Part 1 question: '" + question + "'") "Speaking Part 1 question: '" + question + "'")
token_count = count_tokens(perfect_answer_message)["n_tokens"] token_count = count_tokens(perfect_answer_message)["n_tokens"]
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.")
response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT,
perfect_answer_message, perfect_answer_message,
token_count, token_count,
None, None,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
logging.info("POST - speaking_task_1 - " + str(
request_id) + " - Perfect answer: " + response['perfect_answer'])
response['transcript'] = answer response['transcript'] = answer
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting fixed_text.")
response['fixed_text'] = get_speaking_corrections(answer) response['fixed_text'] = get_speaking_corrections(answer)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
if response["overall"] == "0.0" or response["overall"] == 0.0: if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
response["task_response"]["Lexical Resource"] + response["task_response"][ response["task_response"]["Lexical Resource"] + response["task_response"][
"Grammatical Range and Accuracy"] + response["task_response"][ "Grammatical Range and Accuracy"] + response["task_response"][
"Pronunciation"]) / 4, 1) "Pronunciation"]) / 4, 1)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Final response: " + str(response))
return response return response
else: else:
logging.info("POST - speaking_task_1 - " + str(
request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer)
return { return {
"comment": "The audio recorded does not contain enough english words to be graded.", "comment": "The audio recorded does not contain enough english words to be graded.",
"overall": 0, "overall": 0,
@@ -444,15 +470,24 @@ def get_speaking_task_1_question():
@app.route('/speaking_task_2', methods=['POST']) @app.route('/speaking_task_2', methods=['POST'])
@jwt_required() @jwt_required()
def grade_speaking_task_2(): def grade_speaking_task_2():
request_id = uuid.uuid4()
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
logging.info("POST - speaking_task_2 - Received request to grade speaking task 2. "
"Use this id to track the logs: " + str(request_id))
try: try:
data = request.get_json() data = request.get_json()
question = data.get('question') question = data.get('question')
answer_firebase_path = data.get('answer') answer_firebase_path = data.get('answer')
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Downloading file " + answer_firebase_path)
download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
logging.info("POST - speaking_task_2 - " + str(
request_id) + " - Downloaded file " + answer_firebase_path + " to " + sound_file_name)
answer = speech_to_text(sound_file_name) answer = speech_to_text(sound_file_name)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Transcripted answer: " + answer)
if has_x_words(answer, 20): if has_x_words(answer, 20):
message = ("Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a " message = ("Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a "
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
@@ -464,27 +499,43 @@ def grade_speaking_task_2():
"and Accuracy': 0.0, " "and Accuracy': 0.0, "
"'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") "'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'")
token_count = count_tokens(message)["n_tokens"] token_count = count_tokens(message)["n_tokens"]
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting grading of the answer.")
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count,
["comment"], ["comment"],
GRADING_TEMPERATURE) GRADING_TEMPERATURE)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Answer graded: " + str(response))
perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following " perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following "
"Speaking Part 2 question: '" + question + "'") "Speaking Part 2 question: '" + question + "'")
token_count = count_tokens(perfect_answer_message)["n_tokens"] token_count = count_tokens(perfect_answer_message)["n_tokens"]
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting perfect answer.")
response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT,
perfect_answer_message, perfect_answer_message,
token_count, token_count,
None, None,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
logging.info("POST - speaking_task_2 - " + str(
request_id) + " - Perfect answer: " + response['perfect_answer'])
response['transcript'] = answer response['transcript'] = answer
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Requesting fixed_text.")
response['fixed_text'] = get_speaking_corrections(answer) response['fixed_text'] = get_speaking_corrections(answer)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Fixed text: " + response['fixed_text'])
if response["overall"] == "0.0" or response["overall"] == 0.0: if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["overall"] = round((response["task_response"]["Fluency and Coherence"] +
response["task_response"]["Lexical Resource"] + response["task_response"][ response["task_response"]["Lexical Resource"] + response["task_response"][
"Grammatical Range and Accuracy"] + response["task_response"][ "Grammatical Range and Accuracy"] + response["task_response"][
"Pronunciation"]) / 4, 1) "Pronunciation"]) / 4, 1)
logging.info("POST - speaking_task_2 - " + str(request_id) + " - Final response: " + str(response))
return response return response
else: else:
logging.info("POST - speaking_task_2 - " + str(
request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer)
return { return {
"comment": "The audio recorded does not contain enough english words to be graded.", "comment": "The audio recorded does not contain enough english words to be graded.",
"overall": 0, "overall": 0,
@@ -552,20 +603,34 @@ def get_speaking_task_3_question():
@app.route('/speaking_task_3', methods=['POST']) @app.route('/speaking_task_3', methods=['POST'])
@jwt_required() @jwt_required()
def grade_speaking_task_3(): def grade_speaking_task_3():
request_id = uuid.uuid4()
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
logging.info("POST - speaking_task_3 - Received request to grade speaking task 3. "
"Use this id to track the logs: " + str(request_id))
try: try:
data = request.get_json() data = request.get_json()
answers = data.get('answers') answers = data.get('answers')
text_answers = [] text_answers = []
perfect_answers = [] perfect_answers = []
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - Received " + str(len(answers)) + " total answers.")
for item in answers: for item in answers:
sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Downloading file " + item["answer"])
download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name) download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
logging.info("POST - speaking_task_1 - " + str(
request_id) + " - Downloaded file " + item["answer"] + " to " + sound_file_name)
answer_text = speech_to_text(sound_file_name) answer_text = speech_to_text(sound_file_name)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer_text)
text_answers.append(answer_text) text_answers.append(answer_text)
item["answer"] = answer_text item["answer"] = answer_text
os.remove(sound_file_name) os.remove(sound_file_name)
if not has_x_words(answer_text, 20): if not has_x_words(answer_text, 20):
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - The answer had less words than threshold 20 to be graded. Answer: " + answer_text)
return { return {
"comment": "The audio recorded does not contain enough english words to be graded.", "comment": "The audio recorded does not contain enough english words to be graded.",
"overall": 0, "overall": 0,
@@ -579,22 +644,27 @@ def grade_speaking_task_3():
perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following " perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following "
"Speaking Part 3 question: '" + item["question"] + "'") "Speaking Part 3 question: '" + item["question"] + "'")
token_count = count_tokens(perfect_answer_message)["n_tokens"] token_count = count_tokens(perfect_answer_message)["n_tokens"]
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - Requesting perfect answer for question: " + item["question"])
perfect_answers.append(make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, perfect_answers.append(make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT,
perfect_answer_message, perfect_answer_message,
token_count, token_count,
None, None,
GEN_QUESTION_TEMPERATURE)) GEN_QUESTION_TEMPERATURE))
message = ( message = (
"Evaluate the given Speaking Part 2 response based on the IELTS grading system, ensuring a " "Evaluate the given Speaking Part 3 response based on the IELTS grading system, ensuring a "
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " "strict assessment that penalizes errors. Deduct points for deviations from the task, and "
"assign a score of 0 if the response fails to address the question. Additionally, provide detailed " "assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
"commentary highlighting both strengths and weaknesses in the response." "commentary highlighting both strengths and weaknesses in the response."
"\n\n The questions and answers are: \n\n'") "\n\n The questions and answers are: \n\n'")
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Formatting answers and questions for prompt.")
formatted_text = "" formatted_text = ""
for i, entry in enumerate(answers, start=1): for i, entry in enumerate(answers, start=1):
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - Formatted answers and questions for prompt: " + formatted_text)
message += formatted_text message += formatted_text
message += ( message += (
@@ -603,12 +673,19 @@ def grade_speaking_task_3():
"'Grammatical Range and Accuracy': 0.0, 'Pronunciation': 0.0}}") "'Grammatical Range and Accuracy': 0.0, 'Pronunciation': 0.0}}")
token_count = count_tokens(message)["n_tokens"] token_count = count_tokens(message)["n_tokens"]
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Requesting grading of the answers.")
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count,
["comment"], ["comment"],
GRADING_TEMPERATURE) GRADING_TEMPERATURE)
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Answers graded: " + str(response))
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Adding perfect answers to response.")
for i, answer in enumerate(perfect_answers, start=1): for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer response['perfect_answer_' + str(i)] = answer
logging.info("POST - speaking_task_3 - " + str(
request_id) + " - Adding transcript and fixed texts to response.")
for i, answer in enumerate(text_answers, start=1): for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = get_speaking_corrections(answer) response['fixed_text_' + str(i)] = get_speaking_corrections(answer)
@@ -616,7 +693,7 @@ def grade_speaking_task_3():
response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["task_response"][ response["overall"] = round((response["task_response"]["Fluency and Coherence"] + response["task_response"][
"Lexical Resource"] + response["task_response"]["Grammatical Range and Accuracy"] + "Lexical Resource"] + response["task_response"]["Grammatical Range and Accuracy"] +
response["task_response"]["Pronunciation"]) / 4, 1) response["task_response"]["Pronunciation"]) / 4, 1)
logging.info("POST - speaking_task_3 - " + str(request_id) + " - Final response: " + str(response))
return response return response
except Exception as e: except Exception as e:
return str(e), 400 return str(e), 400

View File

@@ -10,7 +10,7 @@ def download_firebase_file(bucket_name, source_blob_name, destination_file_name)
bucket = storage_client.bucket(bucket_name) bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(source_blob_name) blob = bucket.blob(source_blob_name)
blob.download_to_filename(destination_file_name) blob.download_to_filename(destination_file_name)
logging.info(f"File uploaded to {destination_file_name}") logging.info(f"File downloaded to {destination_file_name}")
return destination_file_name return destination_file_name