From d532f7deb4b7173c912ba09f26057eb0c6f15f59 Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Thu, 8 Feb 2024 23:42:02 +0000 Subject: [PATCH] Filter topics and words on exercises. --- app.py | 10 ++++++---- helper/constants.py | 31 ++++++++++++++++++++++++++----- helper/exercises.py | 24 ++++++++++++++---------- helper/openai_interface.py | 15 +++++++++++++-- 4 files changed, 59 insertions(+), 21 deletions(-) diff --git a/app.py b/app.py index d1a36a9..c4ea63f 100644 --- a/app.py +++ b/app.py @@ -243,7 +243,8 @@ def get_writing_task_1_general_question(): try: gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \ "student to compose a letter. The prompt should present a specific scenario or situation, " \ - "requiring the student to provide information, advice, or instructions within the letter." + "requiring the student to provide information, advice, or instructions within the letter. " \ + "Make sure that the generated prompt does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_wt1_question)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None, GEN_QUESTION_TEMPERATURE) @@ -391,7 +392,7 @@ def get_speaking_task_1_question(): gen_sp1_question = "Craft a thought-provoking question for IELTS Speaking Part 1 that encourages candidates to delve deeply " \ "into personal experiences, preferences, or insights on diverse topics. Instruct the candidate to offer " \ "not only detailed descriptions but also provide nuanced explanations, examples, or anecdotes to enrich " \ - "their response." \ + "their response. Make sure that the generated question does not contain forbidden subjects in muslim countries." \ "Provide your response in this json format: {'topic': 'topic','question': 'question'}" token_count = count_tokens(gen_sp1_question)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_sp1_question, token_count, GEN_FIELDS, @@ -462,7 +463,7 @@ def get_speaking_task_2_question(): gen_sp2_question = "Create a question for IELTS Speaking Part 2 that encourages candidates to narrate a personal experience " \ "or story related to a randomly selected topic. Include 3 prompts that guide the candidate to describe " \ "specific aspects of the experience, such as details about the situation, their actions, and the " \ - "reasons it left a lasting impression." \ + "reasons it left a lasting impression. Make sure that the generated question does not contain forbidden subjects in muslim countries." \ "Provide your response in this json format: {'topic': 'topic','question': 'question', " \ "'prompts': ['prompt_1', 'prompt_2', 'prompt_3']}" token_count = count_tokens(gen_sp2_question)["n_tokens"] @@ -480,7 +481,8 @@ def get_speaking_task_3_question(): try: gen_sp3_question = "Formulate a set of 3 questions for IELTS Speaking Part 3 that encourage candidates to engage in a " \ "meaningful discussion on a particular topic. Provide inquiries, ensuring " \ - "they explore various aspects, perspectives, and implications related to the topic." \ + "they explore various aspects, perspectives, and implications related to the topic. " \ + "Make sure that the generated question does not contain forbidden subjects in muslim countries." \ "Provide your response in this json format: {'topic': 'topic','questions': ['question', " \ "'question', 'question']}" token_count = count_tokens(gen_sp3_question)["n_tokens"] diff --git a/helper/constants.py b/helper/constants.py index 40f0e50..d743b56 100644 --- a/helper/constants.py +++ b/helper/constants.py @@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30 WRITING_MIN_TIMER_DEFAULT = 60 SPEAKING_MIN_TIMER_DEFAULT = 14 +BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine", + "cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism", + "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians", + "jews", "jew"] + EN_US_VOICES = [ {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli', 'SupportedEngines': ['neural', 'standard']}, @@ -113,7 +118,6 @@ topics = [ "Renewable Energy", "Cultural Diversity", "Modern Technology Trends", - "Women's Rights", "Sustainable Agriculture", "Globalization", "Natural Disasters", @@ -124,7 +128,6 @@ topics = [ "Literature and Classics", "World Geography", "Music and Its Influence", - "Human Rights", "Social Media Impact", "Food Sustainability", "Economics and Markets", @@ -139,7 +142,6 @@ topics = [ "Future of Work", "Film and Cinema", "Genetic Engineering", - "Ancient Mythology", "Climate Policy", "Space Travel", "Renewable Energy Sources", @@ -155,7 +157,6 @@ topics = [ "Yoga and Meditation", "Literary Genres", "World Oceans", - "Gender Equality", "Social Networking", "Sustainable Fashion", "International Trade", @@ -201,7 +202,27 @@ topics = [ "History of Mathematics", "Human-Computer Interaction", "Global Health", - "Cultural Appropriation" + "Cultural Appropriation", + "Traditional cuisine and culinary arts", + "Local music and dance traditions", + "Cultural festivals and celebrations", + "History of the region and historical landmarks", + "Traditional crafts and artisanal skills", + "Wildlife and conservation efforts", + "Local sports and athletic competitions", + "Fashion trends and clothing styles", + "Literature and poetry from the region", + "Education systems and advancements", + "Healthcare services and medical innovations", + "Family values and social dynamics", + "Travel destinations and tourist attractions", + "Environmental sustainability projects", + "Technological developments and innovations", + "Entrepreneurship and business ventures", + "Youth empowerment initiatives", + "Art exhibitions and cultural events", + "Philanthropy and community development projects", + "Political developments and civic engagement efforts" ] two_people_scenarios = [ diff --git a/helper/exercises.py b/helper/exercises.py index 99cdff7..8112950 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -170,11 +170,12 @@ def build_write_blanks_solutions_listening(words: [], start_id): def generate_reading_passage(type: QuestionType, topic: str): gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \ - "of " + topic + ". The passage should offer a substantial amount of " \ - "information, analysis, or narrative " \ - "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ - "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \ - "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}" + "of '" + topic + "'. The passage should offer a substantial amount of " \ + "information, analysis, or narrative " \ + "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ + "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \ + "Make sure that the generated text does not contain forbidden subjects in muslim countries." \ + "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}" token_count = count_tokens(gen_reading_passage_1)["n_tokens"] return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) @@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str): def generate_listening_1_conversation(topic: str): gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \ "social context of '" + topic + "'. Please include random names and genders " \ - "for the characters in your dialogue." + "for the characters in your dialogue. " \ + "Make sure that the generated conversation does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, @@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str): def generate_listening_2_monologue(topic: str): - gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'" + gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, @@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str): def generate_listening_3_conversation(topic: str): gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \ "in the everyday social context of '" + topic + \ - "'. Please include random names and genders for the characters in your dialogue." + "'. Please include random names and genders for the characters in your dialogue. " \ + "Make sure that the generated conversation does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, @@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str): def generate_listening_4_monologue(topic: str): - gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'" + gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, @@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1): all_exams = get_all("level") seen_keys = set() for i in range(len(question["questions"])): - question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys) + question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, + seen_keys) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", diff --git a/helper/openai_interface.py b/helper/openai_interface.py index 1058ea3..3bd86af 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -5,7 +5,7 @@ import re from dotenv import load_dotenv -from helper.constants import GPT_3_5_TURBO_INSTRUCT +from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS from helper.token_counter import count_tokens load_dotenv() @@ -15,7 +15,7 @@ MAX_TOKENS = 4097 TOP_P = 0.9 FREQUENCY_PENALTY = 0.5 -TRY_LIMIT = 1 +TRY_LIMIT = 2 try_count = 0 @@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature=0.7 )["choices"][0]["text"] + if has_blacklisted_words(response) and try_count < TRY_LIMIT: + try_count = try_count + 1 + return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) + elif has_blacklisted_words(response) and try_count >= TRY_LIMIT: + return "" + if fields_to_check is None: return response.replace("\n\n", " ").strip() @@ -264,3 +270,8 @@ def get_speaking_corrections(text): token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) return response["fixed_text"] + + +def has_blacklisted_words(text: str): + text_lower = text.lower() + return any(word in text_lower for word in BLACKLISTED_WORDS)