All tested except grading speaking.

This commit is contained in:
Cristiano Ferreira
2024-05-22 21:07:48 +01:00
parent fe753fe72c
commit b7c18517de
4 changed files with 494 additions and 321 deletions

16
app.py
View File

@@ -57,12 +57,12 @@ def get_listening_section_1_question():
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_1_conversation(topic)
processed_conversation = generate_listening_1_conversation(topic)
app.logger.info("Generated conversation: " + str(processed_conversation))
start_id = 1
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises,
exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
number_of_exercises_q,
start_id, difficulty)
return {
@@ -93,8 +93,8 @@ def get_listening_section_2_question():
app.logger.info("Generated monologue: " + str(monologue))
start_id = 11
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id,
difficulty)
exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
start_id, difficulty)
return {
"exercises": exercises,
"text": monologue,
@@ -119,12 +119,12 @@ def get_listening_section_3_question():
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_3_conversation(topic)
processed_conversation = generate_listening_3_conversation(topic)
app.logger.info("Generated conversation: " + str(processed_conversation))
start_id = 21
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises,
exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
number_of_exercises_q,
start_id, difficulty)
return {
@@ -155,8 +155,8 @@ def get_listening_section_4_question():
app.logger.info("Generated monologue: " + str(monologue))
start_id = 31
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id,
difficulty)
exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
start_id, difficulty)
return {
"exercises": exercises,
"text": monologue,

View File

@@ -34,9 +34,9 @@ WRITING_MIN_TIMER_DEFAULT = 60
SPEAKING_MIN_TIMER_DEFAULT = 14
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "policies", "human rights", "discrimination", "discriminatory"]
"jews", "jew", "discrimination", "discriminatory"]
EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
@@ -115,7 +115,6 @@ mti_topics = [
"Technology",
"Environment",
"Health and Fitness",
"Globalization",
"Engineering",
"Work and Careers",
"Travel and Tourism",
@@ -176,7 +175,6 @@ topics = [
"Cultural Diversity",
"Modern Technology Trends",
"Sustainable Agriculture",
"Globalization",
"Natural Disasters",
"Cybersecurity",
"Philosophy of Ethics",
@@ -184,7 +182,6 @@ topics = [
"Health and Wellness",
"Literature and Classics",
"World Geography",
"Music and Its Influence",
"Social Media Impact",
"Food Sustainability",
"Economics and Markets",
@@ -215,7 +212,6 @@ topics = [
"World Oceans",
"Social Networking",
"Sustainable Fashion",
"International Trade",
"Prehistoric Era",
"Democracy and Governance",
"Postcolonial Literature",
@@ -231,7 +227,6 @@ topics = [
"Artificial Life",
"Fitness and Nutrition",
"Classic Literature Adaptations",
"World History Wars",
"Ethical Dilemmas",
"Internet of Things (IoT)",
"Meditation Practices",
@@ -239,7 +234,6 @@ topics = [
"Marine Conservation",
"Social Justice Movements",
"Sustainable Tourism",
"International Finance",
"Ancient Philosophy",
"Cold War Era",
"Behavioral Economics",
@@ -442,7 +436,6 @@ social_monologue_contexts = [
"A monologue about the impact of technological advancements",
"An explanation of the process of wildlife rehabilitation",
"A presentation on the history of a famous explorer",
"An overview of traditional storytelling from different cultures",
"A lecture on the principles of effective marketing",
"A discussion about the challenges of environmental sustainability",
"A monologue about the influence of social entrepreneurship",

View File

@@ -10,8 +10,8 @@ from wonderwords import RandomWord
from helper.api_messages import QuestionType
from helper.constants import *
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_instruct_call, make_openai_call
from helper.token_counter import count_tokens
from helper.openai_interface import make_openai_call, count_total_tokens
from helper.speech_to_text_helper import has_x_words
nltk.download('words')
@@ -240,48 +240,63 @@ def build_write_blanks_solutions_listening(words: [], start_id):
def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
"of '" + topic + "'. The passage should offer a substantial amount of " \
"information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
"Make sure that the generated text does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
'Generate an extensive text for IELTS ' + type.value + ', of at least 1500 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
}
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_listening_1_conversation(topic: str):
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
"social context of '" + topic + "'. Please include random names and genders " \
"for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_1_conversation_2_people,
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
'Compose an authentic conversation between two individuals in the everyday social context '
'of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
["conversation"],
GEN_QUESTION_TEMPERATURE
)
chosen_voices = []
name_to_voice = {}
for segment in processed['conversation']:
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
@@ -300,50 +315,66 @@ def generate_listening_1_conversation(topic: str):
chosen_voices.append(voice)
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_2_monologue_social,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response
def generate_listening_3_conversation(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
"in the everyday social context of '" + topic + \
"'. Please include random names and genders for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_3_conversation_4_people,
def generate_listening_2_monologue(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
'Generate a comprehensive monologue set in the social context '
'of "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
None,
["monologue"],
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
return response["monologue"]
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
def generate_listening_3_conversation(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
['conversation'],
["conversation"],
GEN_QUESTION_TEMPERATURE
)
name_to_voice = {}
for segment in processed['conversation']:
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
@@ -355,20 +386,35 @@ def generate_listening_3_conversation(topic: str):
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
return response
def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_4_monologue_academic,
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
'Generate a comprehensive monologue on the academic subject '
'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
None,
["monologue"],
GEN_QUESTION_TEMPERATURE
)
return response
return response["monologue"]
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
@@ -392,7 +438,7 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "matchSentences":
elif req_exercise == "paragraphMatch":
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
@@ -478,27 +524,27 @@ def generate_listening_monologue_exercises(monologue: str, req_exercises: list,
def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \
"'" + text + "'\n" \
"Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions '
'for this text:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -508,23 +554,34 @@ def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty)
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_summary_for_text = "Summarize this text: " + text
token_count = count_tokens(gen_summary_for_text)["n_tokens"]
text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Summarize this text: "'+ text + '"')
gen_words_to_replace = "Select " + str(
quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
token_count = count_tokens(gen_words_to_replace)["n_tokens"]
words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"]
},
{
"role": "user",
"content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
'expressions, from the summary.')
replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
options_words = add_random_words_and_shuffle(words_to_replace, 5)
solutions = fillblanks_build_solutions_array(words_to_replace, start_id)
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = add_random_words_and_shuffle(response["words"], 5)
solutions = fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
@@ -540,20 +597,30 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
gen_true_false_not_given = "Generate " + str(
quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \
"based on the provided text. Ensure that your statements " \
"accurately represent information or inferences from the " \
"text, and provide a variety of responses, including, at least one of each True, " \
"False, and Not Given, as appropriate, in the JSON structure " \
"{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
"\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
"\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent '
'information or inferences from the text, and '
'provide a variety of responses, including, at '
'least one of each True, False, and Not Given, '
'as appropriate.\n\nReference text:\n\n ' + text)
token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
@@ -569,16 +636,25 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_short_answer_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \
"must have maximum 3 words per answer, about this text: '" + text + "'. " \
"Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers, must have maximum 3 words '
'per answer, about this text:\n"' + text + '"')
token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
@@ -592,15 +668,24 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
paragraphs = assign_letters_to_paragraphs(text)
heading_prompt = (
'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
'The paragraphs are these: ' + str(paragraphs))
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))
token_count = count_tokens(heading_prompt)["n_tokens"]
headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count,
["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
}
]
token_count = count_total_tokens(messages)
headings = make_openai_call(GPT_4_O, messages, token_count,["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
@@ -615,7 +700,7 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"]["heading"],
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
@@ -632,28 +717,34 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n"):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
for paragraph in paragraphs.split("\n\n"):
if has_x_words(paragraph, 10):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
'of for this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -663,22 +754,28 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
'of for this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -688,17 +785,26 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this conversation:\n"' + text + '"')
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
@@ -711,17 +817,26 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this monologue:\n"' + text + '"')
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
@@ -734,20 +849,43 @@ def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity:
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str(
quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count,["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
@@ -760,20 +898,42 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str(
quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
@@ -786,18 +946,25 @@ def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str(
quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \
"The conversation is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
'Generate a form with ' + str(
quantity) + ' ' + difficulty + ' difficulty key-value pairs about this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
@@ -810,18 +977,25 @@ def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: i
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str(
quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
'Generate a form with ' + str(
quantity) + ' ' + difficulty + ' difficulty key-value pairs about this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
@@ -840,46 +1014,31 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer."
messages = [{
"role": "user",
"content": gen_multiple_choice_for_text
}]
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count,
None,
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
if not '25' in mc_questions:
if len(question["questions"]) != 25:
return gen_multiple_choice_level(quantity, start_id)
else:
split_mc_questions = mc_questions.split('13')
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + split_mc_questions[0] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question)
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question_2)
question["questions"].extend(question_2["questions"])
all_exams = get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
@@ -916,23 +1075,37 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
def generate_single_mc_level_question():
gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
"be easy, intermediate or advanced."
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}')
},
{
"role": "user",
"content": ('Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
'intermediate or advanced.')
parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}. '
'\nThe questions: "' + mc_question + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["options"],
GEN_QUESTION_TEMPERATURE)
token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
["options"],
GEN_QUESTION_TEMPERATURE)
return question
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -1,5 +1,6 @@
import json
import os
import re
from openai import OpenAI
from dotenv import load_dotenv
@@ -63,10 +64,15 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
response_format={"type": "json_object"}
)
result = result.choices[0].message.content
if has_blacklisted_words(result) and try_count < TRY_LIMIT:
found_blacklisted_word = get_found_blacklisted_words(result)
if found_blacklisted_word is not None and try_count < TRY_LIMIT:
from app import app
app.logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
try_count = try_count + 1
return make_openai_call(model, messages, token_count, fields_to_check, temperature)
elif has_blacklisted_words(result) and try_count >= TRY_LIMIT:
elif found_blacklisted_word is not None and try_count >= TRY_LIMIT:
return ""
if fields_to_check is None:
@@ -83,11 +89,6 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
return json.loads(result)
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
global try_count
return ""
# GRADING SUMMARY
def calculate_grading_summary(body):
extracted_sections = extract_existing_sections_from_body(body, section_keys)
@@ -210,6 +211,12 @@ def has_blacklisted_words(text: str):
text_lower = text.lower()
return any(word in text_lower for word in BLACKLISTED_WORDS)
def get_found_blacklisted_words(text: str):
text_lower = text.lower()
for word in BLACKLISTED_WORDS:
if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
return word
return None
def remove_special_characters_from_beginning(string):
cleaned_string = string.lstrip('\n')