All tested except grading speaking.

This commit is contained in:
Cristiano Ferreira
2024-05-22 21:07:48 +01:00
parent fe753fe72c
commit b7c18517de
4 changed files with 494 additions and 321 deletions

16
app.py
View File

@@ -57,12 +57,12 @@ def get_listening_section_1_question():
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises)) number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_1_conversation(topic) processed_conversation = generate_listening_1_conversation(topic)
app.logger.info("Generated conversation: " + str(processed_conversation)) app.logger.info("Generated conversation: " + str(processed_conversation))
start_id = 1 start_id = 1
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
number_of_exercises_q, number_of_exercises_q,
start_id, difficulty) start_id, difficulty)
return { return {
@@ -93,8 +93,8 @@ def get_listening_section_2_question():
app.logger.info("Generated monologue: " + str(monologue)) app.logger.info("Generated monologue: " + str(monologue))
start_id = 11 start_id = 11
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id, exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
difficulty) start_id, difficulty)
return { return {
"exercises": exercises, "exercises": exercises,
"text": monologue, "text": monologue,
@@ -119,12 +119,12 @@ def get_listening_section_3_question():
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises)) number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_3_conversation(topic) processed_conversation = generate_listening_3_conversation(topic)
app.logger.info("Generated conversation: " + str(processed_conversation)) app.logger.info("Generated conversation: " + str(processed_conversation))
start_id = 21 start_id = 21
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
number_of_exercises_q, number_of_exercises_q,
start_id, difficulty) start_id, difficulty)
return { return {
@@ -155,8 +155,8 @@ def get_listening_section_4_question():
app.logger.info("Generated monologue: " + str(monologue)) app.logger.info("Generated monologue: " + str(monologue))
start_id = 31 start_id = 31
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id, exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
difficulty) start_id, difficulty)
return { return {
"exercises": exercises, "exercises": exercises,
"text": monologue, "text": monologue,

View File

@@ -34,9 +34,9 @@ WRITING_MIN_TIMER_DEFAULT = 60
SPEAKING_MIN_TIMER_DEFAULT = 14 SPEAKING_MIN_TIMER_DEFAULT = 14
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine", BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism", "cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians", "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "policies", "human rights", "discrimination", "discriminatory"] "jews", "jew", "discrimination", "discriminatory"]
EN_US_VOICES = [ EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli', {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
@@ -115,7 +115,6 @@ mti_topics = [
"Technology", "Technology",
"Environment", "Environment",
"Health and Fitness", "Health and Fitness",
"Globalization",
"Engineering", "Engineering",
"Work and Careers", "Work and Careers",
"Travel and Tourism", "Travel and Tourism",
@@ -176,7 +175,6 @@ topics = [
"Cultural Diversity", "Cultural Diversity",
"Modern Technology Trends", "Modern Technology Trends",
"Sustainable Agriculture", "Sustainable Agriculture",
"Globalization",
"Natural Disasters", "Natural Disasters",
"Cybersecurity", "Cybersecurity",
"Philosophy of Ethics", "Philosophy of Ethics",
@@ -184,7 +182,6 @@ topics = [
"Health and Wellness", "Health and Wellness",
"Literature and Classics", "Literature and Classics",
"World Geography", "World Geography",
"Music and Its Influence",
"Social Media Impact", "Social Media Impact",
"Food Sustainability", "Food Sustainability",
"Economics and Markets", "Economics and Markets",
@@ -215,7 +212,6 @@ topics = [
"World Oceans", "World Oceans",
"Social Networking", "Social Networking",
"Sustainable Fashion", "Sustainable Fashion",
"International Trade",
"Prehistoric Era", "Prehistoric Era",
"Democracy and Governance", "Democracy and Governance",
"Postcolonial Literature", "Postcolonial Literature",
@@ -231,7 +227,6 @@ topics = [
"Artificial Life", "Artificial Life",
"Fitness and Nutrition", "Fitness and Nutrition",
"Classic Literature Adaptations", "Classic Literature Adaptations",
"World History Wars",
"Ethical Dilemmas", "Ethical Dilemmas",
"Internet of Things (IoT)", "Internet of Things (IoT)",
"Meditation Practices", "Meditation Practices",
@@ -239,7 +234,6 @@ topics = [
"Marine Conservation", "Marine Conservation",
"Social Justice Movements", "Social Justice Movements",
"Sustainable Tourism", "Sustainable Tourism",
"International Finance",
"Ancient Philosophy", "Ancient Philosophy",
"Cold War Era", "Cold War Era",
"Behavioral Economics", "Behavioral Economics",
@@ -442,7 +436,6 @@ social_monologue_contexts = [
"A monologue about the impact of technological advancements", "A monologue about the impact of technological advancements",
"An explanation of the process of wildlife rehabilitation", "An explanation of the process of wildlife rehabilitation",
"A presentation on the history of a famous explorer", "A presentation on the history of a famous explorer",
"An overview of traditional storytelling from different cultures",
"A lecture on the principles of effective marketing", "A lecture on the principles of effective marketing",
"A discussion about the challenges of environmental sustainability", "A discussion about the challenges of environmental sustainability",
"A monologue about the influence of social entrepreneurship", "A monologue about the influence of social entrepreneurship",

View File

@@ -10,8 +10,8 @@ from wonderwords import RandomWord
from helper.api_messages import QuestionType from helper.api_messages import QuestionType
from helper.constants import * from helper.constants import *
from helper.firebase_helper import get_all from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_instruct_call, make_openai_call from helper.openai_interface import make_openai_call, count_total_tokens
from helper.token_counter import count_tokens from helper.speech_to_text_helper import has_x_words
nltk.download('words') nltk.download('words')
@@ -240,48 +240,63 @@ def build_write_blanks_solutions_listening(words: [], start_id):
def generate_reading_passage(type: QuestionType, topic: str): def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \ messages = [
"of '" + topic + "'. The passage should offer a substantial amount of " \ {
"information, analysis, or narrative " \ "role": "system",
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ "content": (
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \ 'You are a helpful assistant designed to output JSON on this format: '
"Make sure that the generated text does not contain forbidden subjects in muslim countries." \ '{"title": "title of the text", "text": "generated text"}')
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}" },
token_count = count_tokens(gen_reading_passage_1)["n_tokens"] {
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS, "role": "user",
GEN_QUESTION_TEMPERATURE) "content": (
'Generate an extensive text for IELTS ' + type.value + ', of at least 1500 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
}
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_listening_1_conversation(topic: str): def generate_listening_1_conversation(topic: str):
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \ messages = [
"social context of '" + topic + "'. Please include random names and genders " \ {
"for the characters in your dialogue. " \ "role": "system",
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries." "content": (
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"] 'You are a helpful assistant designed to output JSON on this format: '
response = make_openai_instruct_call( '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
GPT_3_5_TURBO_INSTRUCT, },
gen_listening_1_conversation_2_people, {
"role": "user",
"content": (
'Compose an authentic conversation between two individuals in the everyday social context '
'of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count, token_count,
None, ["conversation"],
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
GEN_QUESTION_TEMPERATURE GEN_QUESTION_TEMPERATURE
) )
chosen_voices = [] chosen_voices = []
name_to_voice = {} name_to_voice = {}
for segment in processed['conversation']: for segment in response['conversation']:
if 'voice' not in segment: if 'voice' not in segment:
name = segment['name'] name = segment['name']
if name in name_to_voice: if name in name_to_voice:
@@ -300,50 +315,66 @@ def generate_listening_1_conversation(topic: str):
chosen_voices.append(voice) chosen_voices.append(voice)
name_to_voice[name] = voice name_to_voice[name] = voice
segment['voice'] = voice segment['voice'] = voice
return response, processed
def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_2_monologue_social,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response return response
def generate_listening_3_conversation(topic: str): def generate_listening_2_monologue(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \ messages = [
"in the everyday social context of '" + topic + \ {
"'. Please include random names and genders for the characters in your dialogue. " \ "role": "system",
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries." "content": (
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"] 'You are a helpful assistant designed to output JSON on this format: '
response = make_openai_instruct_call( '{"monologue": "monologue"}')
GPT_3_5_TURBO_INSTRUCT, },
gen_listening_3_conversation_4_people, {
"role": "user",
"content": (
'Generate a comprehensive monologue set in the social context '
'of "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count, token_count,
None, ["monologue"],
GEN_QUESTION_TEMPERATURE GEN_QUESTION_TEMPERATURE
) )
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}' return response["monologue"]
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"] def generate_listening_3_conversation(topic: str):
processed = make_openai_instruct_call( messages = [
GPT_3_5_TURBO_INSTRUCT, {
parse_conversation, "role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count, token_count,
['conversation'], ["conversation"],
GEN_QUESTION_TEMPERATURE GEN_QUESTION_TEMPERATURE
) )
name_to_voice = {} name_to_voice = {}
for segment in processed['conversation']: for segment in response['conversation']:
if 'voice' not in segment: if 'voice' not in segment:
name = segment['name'] name = segment['name']
if name in name_to_voice: if name in name_to_voice:
@@ -355,20 +386,35 @@ def generate_listening_3_conversation(topic: str):
voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice name_to_voice[name] = voice
segment['voice'] = voice segment['voice'] = voice
return response, processed return response
def generate_listening_4_monologue(topic: str): def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries." messages = [
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"] {
response = make_openai_instruct_call( "role": "system",
GPT_3_5_TURBO_INSTRUCT, "content": (
gen_listening_4_monologue_academic, 'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
'Generate a comprehensive monologue on the academic subject '
'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count, token_count,
None, ["monologue"],
GEN_QUESTION_TEMPERATURE GEN_QUESTION_TEMPERATURE
) )
return response return response["monologue"]
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
@@ -392,7 +438,7 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
else: else:
exercises.append({}) exercises.append({})
print("Did not add write blanks because it did not respect word limit") print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "matchSentences": elif req_exercise == "paragraphMatch":
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question) exercises.append(question)
print("Added paragraph match: " + str(question)) print("Added paragraph match: " + str(question))
@@ -478,27 +524,27 @@ def generate_listening_monologue_exercises(monologue: str, req_exercises: list,
def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty): def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \ {
"'" + text + "'\n" \ "role": "system",
"Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "content": (
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ 'You are a helpful assistant designed to output JSON on this format: '
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
"\"solution\": \"C\", \"variant\": \"text\"}]" '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] 'energy sources?", "solution": "C", "variant": "text"}]}')
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, },
None, {
GEN_QUESTION_TEMPERATURE) "role": "user",
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "content": (
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions '
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ 'for this text:\n"' + text + '"')
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" }
token_count = count_tokens(parse_mc_questions)["n_tokens"] ]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, token_count = count_total_tokens(messages)
["questions"], question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.", "prompt": "Select the appropriate option.",
@@ -508,23 +554,34 @@ def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty)
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty): def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_summary_for_text = "Summarize this text: " + text messages = [
token_count = count_tokens(gen_summary_for_text)["n_tokens"] {
text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count, "role": "system",
None, "content": (
GEN_QUESTION_TEMPERATURE) 'You are a helpful assistant designed to output JSON on this format: '
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Summarize this text: "'+ text + '"')
gen_words_to_replace = "Select " + str( },
quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \ {
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary "role": "user",
token_count = count_tokens(gen_words_to_replace)["n_tokens"] "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, 'expressions, from the summary.')
["words"],
GEN_QUESTION_TEMPERATURE)["words"]
replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id) }
options_words = add_random_words_and_shuffle(words_to_replace, 5) ]
solutions = fillblanks_build_solutions_array(words_to_replace, start_id) token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = add_random_words_and_shuffle(response["words"], 5)
solutions = fillblanks_build_solutions_array(response["words"], start_id)
return { return {
"allowRepetition": True, "allowRepetition": True,
@@ -540,20 +597,30 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty): def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
gen_true_false_not_given = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \ {
"based on the provided text. Ensure that your statements " \ "role": "system",
"accurately represent information or inferences from the " \ "content": (
"text, and provide a variety of responses, including, at least one of each True, " \ 'You are a helpful assistant designed to output JSON on this format: '
"False, and Not Given, as appropriate, in the JSON structure " \ '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
"{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \ '{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
"\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \ },
"\"solution\": \"true/false/not_given\"}]}. Reference text: " + text {
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent '
'information or inferences from the text, and '
'provide a variety of responses, including, at '
'least one of each True, False, and Not Given, '
'as appropriate.\n\nReference text:\n\n ' + text)
token_count = count_tokens(gen_true_false_not_given)["n_tokens"] }
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count, ]
["prompts"], token_count = count_total_tokens(messages)
GEN_QUESTION_TEMPERATURE)["prompts"]
questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity: if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity) questions = remove_excess_questions(questions, len(questions) - quantity)
@@ -569,16 +636,25 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_short_answer_questions = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \ {
"must have maximum 3 words per answer, about this text: '" + text + "'. " \ "role": "system",
"Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \ "content": (
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}" 'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers, must have maximum 3 words '
'per answer, about this text:\n"' + text + '"')
token_count = count_tokens(gen_short_answer_questions)["n_tokens"] }
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count, ]
["questions"], token_count = count_total_tokens(messages)
GEN_QUESTION_TEMPERATURE)["questions"][:quantity] questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -592,15 +668,24 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
def gen_paragraph_match_exercise(text: str, quantity: int, start_id): def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
paragraphs = assign_letters_to_paragraphs(text) paragraphs = assign_letters_to_paragraphs(text)
heading_prompt = ( messages = [
'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: ' {
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n' "role": "system",
'The paragraphs are these: ' + str(paragraphs)) "content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))
token_count = count_tokens(heading_prompt)["n_tokens"] }
headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count, ]
["headings"], token_count = count_total_tokens(messages)
GEN_QUESTION_TEMPERATURE)["headings"]
headings = make_openai_call(GPT_4_O, messages, token_count,["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = [] options = []
for i, paragraph in enumerate(paragraphs, start=0): for i, paragraph in enumerate(paragraphs, start=0):
@@ -615,7 +700,7 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
for i, paragraph in enumerate(paragraphs, start=start_id): for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({ sentences.append({
"id": i, "id": i,
"sentence": paragraph["heading"]["heading"], "sentence": paragraph["heading"],
"solution": paragraph["letter"] "solution": paragraph["letter"]
}) })
@@ -632,28 +717,34 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
def assign_letters_to_paragraphs(paragraphs): def assign_letters_to_paragraphs(paragraphs):
result = [] result = []
letters = iter(string.ascii_uppercase) letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n"): for paragraph in paragraphs.split("\n\n"):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)}) if has_x_words(paragraph, 10):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result return result
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \ {
"'" + text + "'" "role": "system",
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] "content": (
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, 'You are a helpful assistant designed to output JSON on this format: '
None, '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
GEN_QUESTION_TEMPERATURE) '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ 'energy sources?", "solution": "C", "variant": "text"}]}')
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ },
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ {
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" "role": "user",
token_count = count_tokens(parse_mc_questions)["n_tokens"] "content": (
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
["questions"], 'of for this conversation:\n"' + text + '"')
GEN_QUESTION_TEMPERATURE)
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.", "prompt": "Select the appropriate option.",
@@ -663,22 +754,28 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \ {
"'" + text + "'" "role": "system",
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] "content": (
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, 'You are a helpful assistant designed to output JSON on this format: '
None, '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
GEN_QUESTION_TEMPERATURE) '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ 'energy sources?", "solution": "C", "variant": "text"}]}')
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ },
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ {
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" "role": "user",
token_count = count_tokens(parse_mc_questions)["n_tokens"] "content": (
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, 'Generate ' + str(
["questions"], quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
GEN_QUESTION_TEMPERATURE) 'of for this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.", "prompt": "Select the appropriate option.",
@@ -688,17 +785,26 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \ {
"(max 3 words per answer), about a monologue and" \ "role": "system",
"respond in this JSON format: {\"questions\": [{\"question\": question, " \ "content": (
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \ 'You are a helpful assistant designed to output JSON on this format: '
"The monologue is this: '" + text + "'" '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this conversation:\n"' + text + '"')
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"] }
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count, ]
["questions"], token_count = count_total_tokens(messages)
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -711,17 +817,26 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \ {
"(max 3 words per answer), about a monologue and" \ "role": "system",
"respond in this JSON format: {\"questions\": [{\"question\": question, " \ "content": (
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \ 'You are a helpful assistant designed to output JSON on this format: '
"The monologue is this: '" + text + "'" '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this monologue:\n"' + text + '"')
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"] }
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count, ]
["questions"], token_count = count_total_tokens(messages)
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -734,20 +849,43 @@ def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity:
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \ {
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'" "role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases word_messages = [
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, {
["words"], "role": "system",
GEN_QUESTION_TEMPERATURE)["words"][:quantity] "content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count,["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -760,20 +898,42 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str( messages = [
quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \ {
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'" "role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases word_messages = [
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, {
["words"], "role": "system",
GEN_QUESTION_TEMPERATURE)["words"][:quantity] "content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -786,18 +946,25 @@ def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str( messages = [
quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \ {
"The conversation is this: '" + text + "'" "role": "system",
token_count = count_tokens(gen_write_blanks_form)["n_tokens"] "content": (
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count, 'You are a helpful assistant designed to output JSON on this format: '
None, '{"form": ["key: value", "key2: value"]}')
GEN_QUESTION_TEMPERATURE) },
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'" {
token_count = count_tokens(parse_form)["n_tokens"] "role": "user",
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count, "content": (
["form"], 'Generate a form with ' + str(
GEN_QUESTION_TEMPERATURE)["form"][:quantity] quantity) + ' ' + difficulty + ' difficulty key-value pairs about this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -810,18 +977,25 @@ def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: i
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str( messages = [
quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \ {
"The monologue is this: '" + text + "'" "role": "system",
token_count = count_tokens(gen_write_blanks_form)["n_tokens"] "content": (
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count, 'You are a helpful assistant designed to output JSON on this format: '
None, '{"form": ["key: value", "key2: value"]}')
GEN_QUESTION_TEMPERATURE) },
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'" {
token_count = count_tokens(parse_form)["n_tokens"] "role": "user",
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count, "content": (
["form"], 'Generate a form with ' + str(
GEN_QUESTION_TEMPERATURE)["form"][:quantity] quantity) + ' ' + difficulty + ' difficulty key-value pairs about this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
@@ -840,46 +1014,31 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \ "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer." "every question only has 1 correct answer."
messages = [{ messages = [
"role": "user", {
"content": gen_multiple_choice_for_text "role": "system",
}] "content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 token_count = count_total_tokens(messages)
mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count, question = make_openai_call(GPT_4_O, messages, token_count,
None, ["questions"],
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
if not '25' in mc_questions:
if len(question["questions"]) != 25:
return gen_multiple_choice_level(quantity, start_id) return gen_multiple_choice_level(quantity, start_id)
else: else:
split_mc_questions = mc_questions.split('13')
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + split_mc_questions[0] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question)
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question_2)
question["questions"].extend(question_2["questions"])
all_exams = get_all("level") all_exams = get_all("level")
seen_keys = set() seen_keys = set()
for i in range(len(question["questions"])): for i in range(len(question["questions"])):
@@ -916,23 +1075,37 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
def generate_single_mc_level_question(): def generate_single_mc_level_question():
gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \ messages = [
"be easy, intermediate or advanced." {
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 "role": "system",
mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, "content": (
None, 'You are a helpful assistant designed to output JSON on this format: '
GEN_QUESTION_TEMPERATURE) '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}')
},
{
"role": "user",
"content": ('Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
'intermediate or advanced.')
parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": ' }
'[{"id": "A", "text": ' ]
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' token_count = count_total_tokens(messages)
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", ' question = make_openai_call(GPT_4_O, messages, token_count,["options"],
'"solution": "A", "variant": "text"}. ' GEN_QUESTION_TEMPERATURE)
'\nThe questions: "' + mc_question + '"')
token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
["options"],
GEN_QUESTION_TEMPERATURE)
return question return question
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -1,5 +1,6 @@
import json import json
import os import os
import re
from openai import OpenAI from openai import OpenAI
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -63,10 +64,15 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
response_format={"type": "json_object"} response_format={"type": "json_object"}
) )
result = result.choices[0].message.content result = result.choices[0].message.content
if has_blacklisted_words(result) and try_count < TRY_LIMIT:
found_blacklisted_word = get_found_blacklisted_words(result)
if found_blacklisted_word is not None and try_count < TRY_LIMIT:
from app import app
app.logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
try_count = try_count + 1 try_count = try_count + 1
return make_openai_call(model, messages, token_count, fields_to_check, temperature) return make_openai_call(model, messages, token_count, fields_to_check, temperature)
elif has_blacklisted_words(result) and try_count >= TRY_LIMIT: elif found_blacklisted_word is not None and try_count >= TRY_LIMIT:
return "" return ""
if fields_to_check is None: if fields_to_check is None:
@@ -83,11 +89,6 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
return json.loads(result) return json.loads(result)
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
global try_count
return ""
# GRADING SUMMARY # GRADING SUMMARY
def calculate_grading_summary(body): def calculate_grading_summary(body):
extracted_sections = extract_existing_sections_from_body(body, section_keys) extracted_sections = extract_existing_sections_from_body(body, section_keys)
@@ -210,6 +211,12 @@ def has_blacklisted_words(text: str):
text_lower = text.lower() text_lower = text.lower()
return any(word in text_lower for word in BLACKLISTED_WORDS) return any(word in text_lower for word in BLACKLISTED_WORDS)
def get_found_blacklisted_words(text: str):
text_lower = text.lower()
for word in BLACKLISTED_WORDS:
if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
return word
return None
def remove_special_characters_from_beginning(string): def remove_special_characters_from_beginning(string):
cleaned_string = string.lstrip('\n') cleaned_string = string.lstrip('\n')