diff --git a/app.py b/app.py index 20d0b0e..20de726 100644 --- a/app.py +++ b/app.py @@ -239,17 +239,27 @@ def grade_writing_task_1(): } } else: - message = ("Evaluate the given Writing Task 1 response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide an " - "exemplary answer with a minimum of 150 words, along with a detailed commentary highlighting " - "both strengths and weaknesses in the response. Present your evaluation in JSON format with " - "the following structure: {'perfect_answer': 'example perfect answer', 'comment': " - "'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " - "'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " - "0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") - token_count = count_tokens(message)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, + messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"perfect_answer": "example perfect answer", "comment": ' + '"comment about answer quality", "overall": 0.0, "task_response": ' + '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' + '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') + }, + { + "role": "user", + "content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, ' + 'ensuring a strict assessment that penalizes errors. Deduct points for deviations ' + 'from the task, and assign a score of 0 if the response fails to address the question. ' + 'Additionally, provide an exemplary answer with a minimum of 150 words, along with a ' + 'detailed commentary highlighting both strengths and weaknesses in the response. ' + '\n Question: "' + question + '" \n Answer: "' + answer + '"') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["comment"], GRADING_TEMPERATURE) response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) @@ -265,16 +275,29 @@ def get_writing_task_1_general_question(): difficulty = request.args.get("difficulty", default=random.choice(difficulties)) topic = request.args.get("topic", default=random.choice(mti_topics)) try: - gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \ - "student to compose a letter. The prompt should present a specific scenario or situation, " \ - "based on the topic of '" + topic + "', " \ - "requiring the student to provide information, advice, or instructions within the letter. " \ - "Make sure that the generated prompt is of " + difficulty + " difficulty and does not contain forbidden subjects in muslim countries." - token_count = count_tokens(gen_wt1_question)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None, + messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"prompt": "prompt content"}') + }, + { + "role": "user", + "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the ' + 'student to compose a letter. The prompt should present a specific scenario or situation, ' + 'based on the topic of "' + topic + '", requiring the student to provide information, ' + 'advice, or instructions within the letter. ' + 'Make sure that the generated prompt is ' + 'of ' + difficulty + 'difficulty and does not contain ' + 'forbidden subjects in muslim ' + 'countries.') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE) return { - "question": response.strip(), + "question": response["prompt"].strip(), "difficulty": difficulty, "topic": topic } @@ -312,18 +335,27 @@ def grade_writing_task_2(): } } else: - message = ("Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide an " - "exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting " - "both strengths and weaknesses in the response. Present your evaluation in JSON format with " - "the following structure: {'perfect_answer': 'example perfect answer', 'comment': " - "'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " - "'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " - "0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") - token_count = count_tokens(message)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, - ["comment"], + messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"perfect_answer": "example perfect answer", "comment": ' + '"comment about answer quality", "overall": 0.0, "task_response": ' + '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, ' + '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }') + }, + { + "role": "user", + "content": ('Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a ' + 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' + 'assign a score of 0 if the response fails to address the question. Additionally, provide an ' + 'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting ' + 'both strengths and weaknesses in the response.' + '\n Question: "' + question + '" \n Answer: "' + answer + '"') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_4_O, messages, token_count, ["comment"], GEN_QUESTION_TEMPERATURE) response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) response['fixed_text'] = get_fixed_text(answer) @@ -345,16 +377,24 @@ def get_writing_task_2_general_question(): difficulty = request.args.get("difficulty", default=random.choice(difficulties)) topic = request.args.get("topic", default=random.choice(mti_topics)) try: - gen_wt2_question = "Craft a comprehensive question of " + difficulty + " difficulty for IELTS Writing Task 2 General Training that directs the candidate " \ - "to delve into an in-depth analysis of contrasting perspectives on the topic of '" + topic + "'. The candidate " \ - "should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or " \ - "examples, and present a well-rounded argument before concluding with their personal opinion on the " \ - "subject." - token_count = count_tokens(gen_wt2_question)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt2_question, token_count, None, - GEN_QUESTION_TEMPERATURE) + messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"prompt": "prompt content"}') + }, + { + "role": "user", + "content": ('Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate ' + 'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". ' + 'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or ' + 'examples, and present a well-rounded argument before concluding with their personal opinion on the subject.') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE) return { - "question": response.strip(), + "question": response["prompt"].strip(), "difficulty": difficulty, "topic": topic } @@ -384,32 +424,50 @@ def grade_speaking_task_1(): logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer) if has_x_words(answer, 20): - message = ("Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a " - "strict assessment that penalizes errors. Deduct points for deviations from the task, and " - "assign a score of 0 if the response fails to address the question. Additionally, provide " - "detailed commentary highlighting both strengths and weaknesses in the response. Present your " - "evaluation in JSON format with " - "the following structure: {'comment': 'comment about answer quality', 'overall': 0.0, " - "'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range " - "and Accuracy': 0.0, 'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") - token_count = count_tokens(message)["n_tokens"] - + messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"comment": "comment about answer quality", "overall": 0.0, ' + '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, ' + '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}') + }, + { + "role": "user", + "content": ('Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a ' + 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' + 'assign a score of 0 if the response fails to address the question. Additionally, provide ' + 'detailed commentary highlighting both strengths and weaknesses in the response.' + '\n Question: "'+ question + '" \n Answer: "'+ answer + '"') + } + ] + token_count = count_total_tokens(messages) logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.") - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, - ["comment"], + response = make_openai_call(GPT_3_5_TURBO, messages, token_count,["comment"], GRADING_TEMPERATURE) logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response)) - perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following " - "Speaking Part 1 question: '" + question + "'") - token_count = count_tokens(perfect_answer_message)["n_tokens"] + perfect_answer_messages = [ + { + "role": "system", + "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"answer": "perfect answer"}') + }, + { + "role": "user", + "content": ( + 'Provide a perfect answer according to ielts grading system to the following ' + 'Speaking Part 1 question: "' + question + '"') + } + ] + token_count = count_total_tokens(perfect_answer_messages) logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.") - response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, - perfect_answer_message, - token_count, - None, - GEN_QUESTION_TEMPERATURE) + response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO, + perfect_answer_messages, + token_count, + None, + GEN_QUESTION_TEMPERATURE)["answer"] logging.info("POST - speaking_task_1 - " + str( request_id) + " - Perfect answer: " + response['perfect_answer']) diff --git a/helper/constants.py b/helper/constants.py index 8c16c38..7e9b9b2 100644 --- a/helper/constants.py +++ b/helper/constants.py @@ -7,6 +7,8 @@ GRADING_TEMPERATURE = 0.1 TIPS_TEMPERATURE = 0.2 GEN_QUESTION_TEMPERATURE = 0.7 GPT_3_5_TURBO = "gpt-3.5-turbo" +GPT_4_TURBO = "gpt-4-turbo" +GPT_4_O = "gpt-4o" GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k" GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct" GPT_4_PREVIEW = "gpt-4-turbo-preview" diff --git a/helper/openai_interface.py b/helper/openai_interface.py index 8f0ece3..75d0609 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -1,15 +1,14 @@ import json import os -import re -import openai +from openai import OpenAI from dotenv import load_dotenv -from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS +from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO from helper.token_counter import count_tokens load_dotenv() -openai.api_key = os.getenv("OPENAI_API_KEY") +client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) MAX_TOKENS = 4097 TOP_P = 0.9 @@ -50,105 +49,20 @@ tools = [{ }] -### - -def process_response(input_string, quotation_check_field): - if '{' in input_string: - try: - # Find the index of the first occurrence of '{' - index = input_string.index('{') - # Extract everything after the first '{' (inclusive) - result = input_string[index:] - if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \ - re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE): - json_obj = json.loads(parse_string(result)) - return json_obj - else: - if "title" in result: - parsed_string = result.replace("\n\n", "\n") - parsed_string = parsed_string.replace("\n", "**paragraph**") - else: - parsed_string = result.replace("\n\n", " ") - parsed_string = parsed_string.replace("\n", " ") - parsed_string = re.sub(r',\s*]', ']', parsed_string) - parsed_string = re.sub(r',\s*}', '}', parsed_string) - if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1): - parsed_string = parse_string_2(parsed_string) - return json.loads(parsed_string) - - return json.loads(parsed_string) - except Exception as e: - print(f"Invalid JSON string! Exception: {e}") - print(f"String: {input_string}") - print(f"Exception: {e}") - else: - return input_string - - -def parse_string(to_parse: str): - parsed_string = to_parse.replace("\"", "\\\"") - pattern = r"(?= TRY_LIMIT: try_count = 0 - return result + return json.loads(result) else: try_count = 0 - return processed_response + return json.loads(result) def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature): global try_count - response = openai.Completion.create( - model=model, - prompt=message, - max_tokens=int(4097 - token_count - 300), - temperature=0.7 - )["choices"][0]["text"] - - if has_blacklisted_words(response) and try_count < TRY_LIMIT: - try_count = try_count + 1 - return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) - elif has_blacklisted_words(response) and try_count >= TRY_LIMIT: - try_count = 0 - return "" - - if fields_to_check is None: - try_count = 0 - return response.replace("\n\n", " ").strip() - - response = remove_special_characters_from_beginning(response) - if response[0] != "{" and response[0] != '"': - response = "{\"" + response - if not response.endswith("}"): - response = response + "}" - try: - processed_response = process_response(response, fields_to_check[0]) - reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n") - if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT: - try_count = try_count + 1 - return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) - else: - try_count = 0 - return reparagraphed_response - except Exception as e: - return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) + return "" # GRADING SUMMARY @@ -254,7 +133,7 @@ def calculate_section_grade_summary(section): messages[2:2] = [{"role": "user", "content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}] - res = openai.ChatCompletion.create( + res = client.chat.completions.create( model="gpt-3.5-turbo", max_tokens=chat_config['max_tokens'], temperature=chat_config['temperature'], @@ -298,20 +177,32 @@ def parse_bullet_points(bullet_points_str, grade): def get_fixed_text(text): - message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what ' - 'is wrong. Sample JSON: {"fixed_text": "fixed test with no ' - 'misspelling errors"}] \n The text: "' + text + '"') - token_count = count_tokens(message)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) + messages = [ + {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"fixed_text": "fixed test with no misspelling errors"}') + }, + {"role": "user", "content": ( + 'Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what ' + 'is wrong. \n The text: "' + text + '"') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2) return response["fixed_text"] def get_speaking_corrections(text): - message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only ' - 'replace what is wrong. Sample JSON: {"fixed_text": "fixed ' - 'transcription with no misspelling errors"}] \n The text: "' + text + '"') - token_count = count_tokens(message)["n_tokens"] - response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) + messages = [ + {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: ' + '{"fixed_text": "fixed transcription with no misspelling errors"}') + }, + {"role": "user", "content": ( + 'Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only ' + 'replace what is wrong. \n The text: "' + text + '"') + } + ] + token_count = count_total_tokens(messages) + response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2) return response["fixed_text"] @@ -340,3 +231,9 @@ def replace_expression_in_object(obj, expression, replacement): elif isinstance(obj[key], dict): obj[key] = replace_expression_in_object(obj[key], expression, replacement) return obj + +def count_total_tokens(messages): + total_tokens = 0 + for message in messages: + total_tokens += count_tokens(message["content"])["n_tokens"] + return total_tokens diff --git a/requirements.txt b/requirements.txt index 58bd20d..e1acaf9 100644 Binary files a/requirements.txt and b/requirements.txt differ