Filter topics and words on exercises.

This commit is contained in:
Cristiano Ferreira
2024-02-08 23:42:02 +00:00
parent 9149e4b197
commit d532f7deb4
4 changed files with 59 additions and 21 deletions

10
app.py
View File

@@ -243,7 +243,8 @@ def get_writing_task_1_general_question():
try: try:
gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \ gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \
"student to compose a letter. The prompt should present a specific scenario or situation, " \ "student to compose a letter. The prompt should present a specific scenario or situation, " \
"requiring the student to provide information, advice, or instructions within the letter." "requiring the student to provide information, advice, or instructions within the letter. " \
"Make sure that the generated prompt does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_wt1_question)["n_tokens"] token_count = count_tokens(gen_wt1_question)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None, response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
@@ -391,7 +392,7 @@ def get_speaking_task_1_question():
gen_sp1_question = "Craft a thought-provoking question for IELTS Speaking Part 1 that encourages candidates to delve deeply " \ gen_sp1_question = "Craft a thought-provoking question for IELTS Speaking Part 1 that encourages candidates to delve deeply " \
"into personal experiences, preferences, or insights on diverse topics. Instruct the candidate to offer " \ "into personal experiences, preferences, or insights on diverse topics. Instruct the candidate to offer " \
"not only detailed descriptions but also provide nuanced explanations, examples, or anecdotes to enrich " \ "not only detailed descriptions but also provide nuanced explanations, examples, or anecdotes to enrich " \
"their response." \ "their response. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {'topic': 'topic','question': 'question'}" "Provide your response in this json format: {'topic': 'topic','question': 'question'}"
token_count = count_tokens(gen_sp1_question)["n_tokens"] token_count = count_tokens(gen_sp1_question)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_sp1_question, token_count, GEN_FIELDS, response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_sp1_question, token_count, GEN_FIELDS,
@@ -462,7 +463,7 @@ def get_speaking_task_2_question():
gen_sp2_question = "Create a question for IELTS Speaking Part 2 that encourages candidates to narrate a personal experience " \ gen_sp2_question = "Create a question for IELTS Speaking Part 2 that encourages candidates to narrate a personal experience " \
"or story related to a randomly selected topic. Include 3 prompts that guide the candidate to describe " \ "or story related to a randomly selected topic. Include 3 prompts that guide the candidate to describe " \
"specific aspects of the experience, such as details about the situation, their actions, and the " \ "specific aspects of the experience, such as details about the situation, their actions, and the " \
"reasons it left a lasting impression." \ "reasons it left a lasting impression. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {'topic': 'topic','question': 'question', " \ "Provide your response in this json format: {'topic': 'topic','question': 'question', " \
"'prompts': ['prompt_1', 'prompt_2', 'prompt_3']}" "'prompts': ['prompt_1', 'prompt_2', 'prompt_3']}"
token_count = count_tokens(gen_sp2_question)["n_tokens"] token_count = count_tokens(gen_sp2_question)["n_tokens"]
@@ -480,7 +481,8 @@ def get_speaking_task_3_question():
try: try:
gen_sp3_question = "Formulate a set of 3 questions for IELTS Speaking Part 3 that encourage candidates to engage in a " \ gen_sp3_question = "Formulate a set of 3 questions for IELTS Speaking Part 3 that encourage candidates to engage in a " \
"meaningful discussion on a particular topic. Provide inquiries, ensuring " \ "meaningful discussion on a particular topic. Provide inquiries, ensuring " \
"they explore various aspects, perspectives, and implications related to the topic." \ "they explore various aspects, perspectives, and implications related to the topic. " \
"Make sure that the generated question does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {'topic': 'topic','questions': ['question', " \ "Provide your response in this json format: {'topic': 'topic','questions': ['question', " \
"'question', 'question']}" "'question', 'question']}"
token_count = count_tokens(gen_sp3_question)["n_tokens"] token_count = count_tokens(gen_sp3_question)["n_tokens"]

View File

@@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30
WRITING_MIN_TIMER_DEFAULT = 60 WRITING_MIN_TIMER_DEFAULT = 60
SPEAKING_MIN_TIMER_DEFAULT = 14 SPEAKING_MIN_TIMER_DEFAULT = 14
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew"]
EN_US_VOICES = [ EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli', {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
'SupportedEngines': ['neural', 'standard']}, 'SupportedEngines': ['neural', 'standard']},
@@ -113,7 +118,6 @@ topics = [
"Renewable Energy", "Renewable Energy",
"Cultural Diversity", "Cultural Diversity",
"Modern Technology Trends", "Modern Technology Trends",
"Women's Rights",
"Sustainable Agriculture", "Sustainable Agriculture",
"Globalization", "Globalization",
"Natural Disasters", "Natural Disasters",
@@ -124,7 +128,6 @@ topics = [
"Literature and Classics", "Literature and Classics",
"World Geography", "World Geography",
"Music and Its Influence", "Music and Its Influence",
"Human Rights",
"Social Media Impact", "Social Media Impact",
"Food Sustainability", "Food Sustainability",
"Economics and Markets", "Economics and Markets",
@@ -139,7 +142,6 @@ topics = [
"Future of Work", "Future of Work",
"Film and Cinema", "Film and Cinema",
"Genetic Engineering", "Genetic Engineering",
"Ancient Mythology",
"Climate Policy", "Climate Policy",
"Space Travel", "Space Travel",
"Renewable Energy Sources", "Renewable Energy Sources",
@@ -155,7 +157,6 @@ topics = [
"Yoga and Meditation", "Yoga and Meditation",
"Literary Genres", "Literary Genres",
"World Oceans", "World Oceans",
"Gender Equality",
"Social Networking", "Social Networking",
"Sustainable Fashion", "Sustainable Fashion",
"International Trade", "International Trade",
@@ -201,7 +202,27 @@ topics = [
"History of Mathematics", "History of Mathematics",
"Human-Computer Interaction", "Human-Computer Interaction",
"Global Health", "Global Health",
"Cultural Appropriation" "Cultural Appropriation",
"Traditional cuisine and culinary arts",
"Local music and dance traditions",
"Cultural festivals and celebrations",
"History of the region and historical landmarks",
"Traditional crafts and artisanal skills",
"Wildlife and conservation efforts",
"Local sports and athletic competitions",
"Fashion trends and clothing styles",
"Literature and poetry from the region",
"Education systems and advancements",
"Healthcare services and medical innovations",
"Family values and social dynamics",
"Travel destinations and tourist attractions",
"Environmental sustainability projects",
"Technological developments and innovations",
"Entrepreneurship and business ventures",
"Youth empowerment initiatives",
"Art exhibitions and cultural events",
"Philanthropy and community development projects",
"Political developments and civic engagement efforts"
] ]
two_people_scenarios = [ two_people_scenarios = [

View File

@@ -170,10 +170,11 @@ def build_write_blanks_solutions_listening(words: [], start_id):
def generate_reading_passage(type: QuestionType, topic: str): def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \ gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
"of " + topic + ". The passage should offer a substantial amount of " \ "of '" + topic + "'. The passage should offer a substantial amount of " \
"information, analysis, or narrative " \ "information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \ "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
"Make sure that the generated text does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}" "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"] token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS, return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
@@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str):
def generate_listening_1_conversation(topic: str): def generate_listening_1_conversation(topic: str):
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \ gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
"social context of '" + topic + "'. Please include random names and genders " \ "social context of '" + topic + "'. Please include random names and genders " \
"for the characters in your dialogue." "for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"] token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
response = make_openai_instruct_call( response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT, GPT_3_5_TURBO_INSTRUCT,
@@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str):
def generate_listening_2_monologue(topic: str): def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'" gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"] token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call( response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT, GPT_3_5_TURBO_INSTRUCT,
@@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str):
def generate_listening_3_conversation(topic: str): def generate_listening_3_conversation(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \ gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
"in the everyday social context of '" + topic + \ "in the everyday social context of '" + topic + \
"'. Please include random names and genders for the characters in your dialogue." "'. Please include random names and genders for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"] token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
response = make_openai_instruct_call( response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT, GPT_3_5_TURBO_INSTRUCT,
@@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str):
def generate_listening_4_monologue(topic: str): def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'" gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"] token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
response = make_openai_instruct_call( response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT, GPT_3_5_TURBO_INSTRUCT,
@@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
all_exams = get_all("level") all_exams = get_all("level")
seen_keys = set() seen_keys = set()
for i in range(len(question["questions"])): for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys) question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
seen_keys)
return { return {
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.", "prompt": "Select the appropriate option.",

View File

@@ -5,7 +5,7 @@ import re
from dotenv import load_dotenv from dotenv import load_dotenv
from helper.constants import GPT_3_5_TURBO_INSTRUCT from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
from helper.token_counter import count_tokens from helper.token_counter import count_tokens
load_dotenv() load_dotenv()
@@ -15,7 +15,7 @@ MAX_TOKENS = 4097
TOP_P = 0.9 TOP_P = 0.9
FREQUENCY_PENALTY = 0.5 FREQUENCY_PENALTY = 0.5
TRY_LIMIT = 1 TRY_LIMIT = 2
try_count = 0 try_count = 0
@@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
temperature=0.7 temperature=0.7
)["choices"][0]["text"] )["choices"][0]["text"]
if has_blacklisted_words(response) and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
return ""
if fields_to_check is None: if fields_to_check is None:
return response.replace("\n\n", " ").strip() return response.replace("\n\n", " ").strip()
@@ -264,3 +270,8 @@ def get_speaking_corrections(text):
token_count = count_tokens(message)["n_tokens"] token_count = count_tokens(message)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
return response["fixed_text"] return response["fixed_text"]
def has_blacklisted_words(text: str):
text_lower = text.lower()
return any(word in text_lower for word in BLACKLISTED_WORDS)