Initial updates to most recent openai api version.

This commit is contained in:
Cristiano Ferreira
2024-05-19 14:37:50 +01:00
parent 070e8808b1
commit e568aff4e4
4 changed files with 162 additions and 205 deletions

178
app.py
View File

@@ -239,17 +239,27 @@ def grade_writing_task_1():
} }
} }
else: else:
message = ("Evaluate the given Writing Task 1 response based on the IELTS grading system, ensuring a " messages = [
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " {
"assign a score of 0 if the response fails to address the question. Additionally, provide an " "role": "system",
"exemplary answer with a minimum of 150 words, along with a detailed commentary highlighting " "content": ('You are a helpful assistant designed to output JSON on this format: '
"both strengths and weaknesses in the response. Present your evaluation in JSON format with " '{"perfect_answer": "example perfect answer", "comment": '
"the following structure: {'perfect_answer': 'example perfect answer', 'comment': " '"comment about answer quality", "overall": 0.0, "task_response": '
"'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
"'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
"0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") },
token_count = count_tokens(message)["n_tokens"] {
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, "role": "user",
"content": ('Evaluate the given Writing Task 1 response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
'Additionally, provide an exemplary answer with a minimum of 150 words, along with a '
'detailed commentary highlighting both strengths and weaknesses in the response. '
'\n Question: "' + question + '" \n Answer: "' + answer + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count,
["comment"], ["comment"],
GRADING_TEMPERATURE) GRADING_TEMPERATURE)
response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
@@ -265,16 +275,29 @@ def get_writing_task_1_general_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties)) difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics)) topic = request.args.get("topic", default=random.choice(mti_topics))
try: try:
gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \ messages = [
"student to compose a letter. The prompt should present a specific scenario or situation, " \ {
"based on the topic of '" + topic + "', " \ "role": "system",
"requiring the student to provide information, advice, or instructions within the letter. " \ "content": ('You are a helpful assistant designed to output JSON on this format: '
"Make sure that the generated prompt is of " + difficulty + " difficulty and does not contain forbidden subjects in muslim countries." '{"prompt": "prompt content"}')
token_count = count_tokens(gen_wt1_question)["n_tokens"] },
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None, {
"role": "user",
"content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
'based on the topic of "' + topic + '", requiring the student to provide information, '
'advice, or instructions within the letter. '
'Make sure that the generated prompt is '
'of ' + difficulty + 'difficulty and does not contain '
'forbidden subjects in muslim '
'countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt",
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
return { return {
"question": response.strip(), "question": response["prompt"].strip(),
"difficulty": difficulty, "difficulty": difficulty,
"topic": topic "topic": topic
} }
@@ -312,18 +335,27 @@ def grade_writing_task_2():
} }
} }
else: else:
message = ("Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a " messages = [
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " {
"assign a score of 0 if the response fails to address the question. Additionally, provide an " "role": "system",
"exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting " "content": ('You are a helpful assistant designed to output JSON on this format: '
"both strengths and weaknesses in the response. Present your evaluation in JSON format with " '{"perfect_answer": "example perfect answer", "comment": '
"the following structure: {'perfect_answer': 'example perfect answer', 'comment': " '"comment about answer quality", "overall": 0.0, "task_response": '
"'comment about answer quality', 'overall': 0.0, 'task_response': {'Task Achievement': 0.0, " '{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
"'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range and Accuracy': " '"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }')
"0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") },
token_count = count_tokens(message)["n_tokens"] {
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, "role": "user",
["comment"], "content": ('Evaluate the given Writing Task 2 response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide an '
'exemplary answer with a minimum of 250 words, along with a detailed commentary highlighting '
'both strengths and weaknesses in the response.'
'\n Question: "' + question + '" \n Answer: "' + answer + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count, ["comment"],
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
response["overall"] = fix_writing_overall(response["overall"], response["task_response"]) response["overall"] = fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = get_fixed_text(answer) response['fixed_text'] = get_fixed_text(answer)
@@ -345,16 +377,24 @@ def get_writing_task_2_general_question():
difficulty = request.args.get("difficulty", default=random.choice(difficulties)) difficulty = request.args.get("difficulty", default=random.choice(difficulties))
topic = request.args.get("topic", default=random.choice(mti_topics)) topic = request.args.get("topic", default=random.choice(mti_topics))
try: try:
gen_wt2_question = "Craft a comprehensive question of " + difficulty + " difficulty for IELTS Writing Task 2 General Training that directs the candidate " \ messages = [
"to delve into an in-depth analysis of contrasting perspectives on the topic of '" + topic + "'. The candidate " \ {
"should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or " \ "role": "system",
"examples, and present a well-rounded argument before concluding with their personal opinion on the " \ "content": ('You are a helpful assistant designed to output JSON on this format: '
"subject." '{"prompt": "prompt content"}')
token_count = count_tokens(gen_wt2_question)["n_tokens"] },
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt2_question, token_count, None, {
GEN_QUESTION_TEMPERATURE) "role": "user",
"content": ('Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate '
'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". '
'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints, provide evidence or '
'examples, and present a well-rounded argument before concluding with their personal opinion on the subject.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE)
return { return {
"question": response.strip(), "question": response["prompt"].strip(),
"difficulty": difficulty, "difficulty": difficulty,
"topic": topic "topic": topic
} }
@@ -384,32 +424,50 @@ def grade_speaking_task_1():
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer) logging.info("POST - speaking_task_1 - " + str(request_id) + " - Transcripted answer: " + answer)
if has_x_words(answer, 20): if has_x_words(answer, 20):
message = ("Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a " messages = [
"strict assessment that penalizes errors. Deduct points for deviations from the task, and " {
"assign a score of 0 if the response fails to address the question. Additionally, provide " "role": "system",
"detailed commentary highlighting both strengths and weaknesses in the response. Present your " "content": ('You are a helpful assistant designed to output JSON on this format: '
"evaluation in JSON format with " '{"comment": "comment about answer quality", "overall": 0.0, '
"the following structure: {'comment': 'comment about answer quality', 'overall': 0.0, " '"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
"'task_response': {'Fluency and Coherence': 0.0, 'Lexical Resource': 0.0, 'Grammatical Range " '"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
"and Accuracy': 0.0, 'Pronunciation': 0.0}}\n Question: '" + question + "' \n Answer: '" + answer + "'") },
token_count = count_tokens(message)["n_tokens"] {
"role": "user",
"content": ('Evaluate the given Speaking Part 1 response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
'\n Question: "'+ question + '" \n Answer: "'+ answer + '"')
}
]
token_count = count_total_tokens(messages)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.") logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting grading of the answer.")
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, response = make_openai_call(GPT_3_5_TURBO, messages, token_count,["comment"],
["comment"],
GRADING_TEMPERATURE) GRADING_TEMPERATURE)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response)) logging.info("POST - speaking_task_1 - " + str(request_id) + " - Answer graded: " + str(response))
perfect_answer_message = ("Provide a perfect answer according to ielts grading system to the following " perfect_answer_messages = [
"Speaking Part 1 question: '" + question + "'") {
token_count = count_tokens(perfect_answer_message)["n_tokens"] "role": "system",
"content": ('You are a helpful assistant designed to output JSON on this format: '
'{"answer": "perfect answer"}')
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
'Speaking Part 1 question: "' + question + '"')
}
]
token_count = count_total_tokens(perfect_answer_messages)
logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.") logging.info("POST - speaking_task_1 - " + str(request_id) + " - Requesting perfect answer.")
response['perfect_answer'] = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, response['perfect_answer'] = make_openai_call(GPT_3_5_TURBO,
perfect_answer_message, perfect_answer_messages,
token_count, token_count,
None, None,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)["answer"]
logging.info("POST - speaking_task_1 - " + str( logging.info("POST - speaking_task_1 - " + str(
request_id) + " - Perfect answer: " + response['perfect_answer']) request_id) + " - Perfect answer: " + response['perfect_answer'])

View File

@@ -7,6 +7,8 @@ GRADING_TEMPERATURE = 0.1
TIPS_TEMPERATURE = 0.2 TIPS_TEMPERATURE = 0.2
GEN_QUESTION_TEMPERATURE = 0.7 GEN_QUESTION_TEMPERATURE = 0.7
GPT_3_5_TURBO = "gpt-3.5-turbo" GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO = "gpt-4-turbo"
GPT_4_O = "gpt-4o"
GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k" GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct" GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
GPT_4_PREVIEW = "gpt-4-turbo-preview" GPT_4_PREVIEW = "gpt-4-turbo-preview"

View File

@@ -1,15 +1,14 @@
import json import json
import os import os
import re
import openai from openai import OpenAI
from dotenv import load_dotenv from dotenv import load_dotenv
from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
from helper.token_counter import count_tokens from helper.token_counter import count_tokens
load_dotenv() load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
MAX_TOKENS = 4097 MAX_TOKENS = 4097
TOP_P = 0.9 TOP_P = 0.9
@@ -50,105 +49,20 @@ tools = [{
}] }]
###
def process_response(input_string, quotation_check_field):
if '{' in input_string:
try:
# Find the index of the first occurrence of '{'
index = input_string.index('{')
# Extract everything after the first '{' (inclusive)
result = input_string[index:]
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \
re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
json_obj = json.loads(parse_string(result))
return json_obj
else:
if "title" in result:
parsed_string = result.replace("\n\n", "\n")
parsed_string = parsed_string.replace("\n", "**paragraph**")
else:
parsed_string = result.replace("\n\n", " ")
parsed_string = parsed_string.replace("\n", " ")
parsed_string = re.sub(r',\s*]', ']', parsed_string)
parsed_string = re.sub(r',\s*}', '}', parsed_string)
if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1):
parsed_string = parse_string_2(parsed_string)
return json.loads(parsed_string)
return json.loads(parsed_string)
except Exception as e:
print(f"Invalid JSON string! Exception: {e}")
print(f"String: {input_string}")
print(f"Exception: {e}")
else:
return input_string
def parse_string(to_parse: str):
parsed_string = to_parse.replace("\"", "\\\"")
pattern = r"(?<!\w)'|'(?!\w)"
parsed_string = re.sub(pattern, '"', parsed_string)
parsed_string = parsed_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ")
parsed_string = re.sub(r',\s*]', ']', parsed_string)
parsed_string = re.sub(r',\s*}', '}', parsed_string)
return parsed_string
def parse_string_2(to_parse: str):
keys_and_values_str = to_parse.replace("{", "").replace("}", "")
split_pattern = r'(?<="),|(?<="):'
keys_and_values = re.split(split_pattern, keys_and_values_str)
keys = []
values = []
for idx, x in enumerate(keys_and_values):
if (idx % 2) == 0:
keys.append(x)
else:
values.append(x)
parsed_values = []
for value in values:
parsed_values.append(("\"" + value.replace("\"", "").strip() + "\""))
for ind, parsed_value in enumerate(parsed_values):
to_parse = to_parse.replace(values[ind], parsed_values[ind])
to_parse = to_parse.replace(":", ": ")
return to_parse
def remove_special_chars_and_escapes(input_string):
parsed_string = input_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ")
# Define a regular expression pattern to match special characters and escapes
pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
# Use re.sub() to replace the matched patterns with an empty string
cleaned_string = re.sub(pattern, '', parsed_string)
return cleaned_string
def check_fields(obj, fields): def check_fields(obj, fields):
return all(field in obj for field in fields) return all(field in obj for field in fields)
def make_openai_call(model, messages, token_count, fields_to_check, temperature): def make_openai_call(model, messages, token_count, fields_to_check, temperature):
global try_count global try_count
result = openai.ChatCompletion.create( result = client.chat.completions.create(
model=model, model=model,
max_tokens=int(MAX_TOKENS - token_count - 300), max_tokens=int(MAX_TOKENS - token_count - 300),
temperature=float(temperature), temperature=float(temperature),
top_p=float(TOP_P), messages=messages,
frequency_penalty=float(FREQUENCY_PENALTY), response_format={"type": "json_object"}
messages=messages )
)["choices"][0]["message"]["content"] result = result.choices[0].message.content
if has_blacklisted_words(result) and try_count < TRY_LIMIT: if has_blacklisted_words(result) and try_count < TRY_LIMIT:
try_count = try_count + 1 try_count = try_count + 1
return make_openai_call(model, messages, token_count, fields_to_check, temperature) return make_openai_call(model, messages, token_count, fields_to_check, temperature)
@@ -156,57 +70,22 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
return "" return ""
if fields_to_check is None: if fields_to_check is None:
return result.replace("\n\n", " ").strip() return json.loads(result)
processed_response = process_response(result, fields_to_check[0]) if check_fields(result, fields_to_check) is False and try_count < TRY_LIMIT:
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
try_count = try_count + 1 try_count = try_count + 1
return make_openai_call(model, messages, token_count, fields_to_check, temperature) return make_openai_call(model, messages, token_count, fields_to_check, temperature)
elif try_count >= TRY_LIMIT: elif try_count >= TRY_LIMIT:
try_count = 0 try_count = 0
return result return json.loads(result)
else: else:
try_count = 0 try_count = 0
return processed_response return json.loads(result)
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature): def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
global try_count global try_count
response = openai.Completion.create( return ""
model=model,
prompt=message,
max_tokens=int(4097 - token_count - 300),
temperature=0.7
)["choices"][0]["text"]
if has_blacklisted_words(response) and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
try_count = 0
return ""
if fields_to_check is None:
try_count = 0
return response.replace("\n\n", " ").strip()
response = remove_special_characters_from_beginning(response)
if response[0] != "{" and response[0] != '"':
response = "{\"" + response
if not response.endswith("}"):
response = response + "}"
try:
processed_response = process_response(response, fields_to_check[0])
reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n")
if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
else:
try_count = 0
return reparagraphed_response
except Exception as e:
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
# GRADING SUMMARY # GRADING SUMMARY
@@ -254,7 +133,7 @@ def calculate_section_grade_summary(section):
messages[2:2] = [{"role": "user", messages[2:2] = [{"role": "user",
"content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}] "content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}]
res = openai.ChatCompletion.create( res = client.chat.completions.create(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
max_tokens=chat_config['max_tokens'], max_tokens=chat_config['max_tokens'],
temperature=chat_config['temperature'], temperature=chat_config['temperature'],
@@ -298,20 +177,32 @@ def parse_bullet_points(bullet_points_str, grade):
def get_fixed_text(text): def get_fixed_text(text):
message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what ' messages = [
'is wrong. Sample JSON: {"fixed_text": "fixed test with no ' {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
'misspelling errors"}] \n The text: "' + text + '"') '{"fixed_text": "fixed test with no misspelling errors"}')
token_count = count_tokens(message)["n_tokens"] },
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) {"role": "user", "content": (
'Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
'is wrong. \n The text: "' + text + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
return response["fixed_text"] return response["fixed_text"]
def get_speaking_corrections(text): def get_speaking_corrections(text):
message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only ' messages = [
'replace what is wrong. Sample JSON: {"fixed_text": "fixed ' {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
'transcription with no misspelling errors"}] \n The text: "' + text + '"') '{"fixed_text": "fixed transcription with no misspelling errors"}')
token_count = count_tokens(message)["n_tokens"] },
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) {"role": "user", "content": (
'Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
'replace what is wrong. \n The text: "' + text + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
return response["fixed_text"] return response["fixed_text"]
@@ -340,3 +231,9 @@ def replace_expression_in_object(obj, expression, replacement):
elif isinstance(obj[key], dict): elif isinstance(obj[key], dict):
obj[key] = replace_expression_in_object(obj[key], expression, replacement) obj[key] = replace_expression_in_object(obj[key], expression, replacement)
return obj return obj
def count_total_tokens(messages):
total_tokens = 0
for message in messages:
total_tokens += count_tokens(message["content"])["n_tokens"]
return total_tokens

Binary file not shown.