Filter topics and words on exercises.

2024-02-08 23:42:02 +00:00
parent 9149e4b197
commit d532f7deb4
4 changed files with 59 additions and 21 deletions
--- a/app.py
+++ b/app.py
@@ -243,7 +243,8 @@ def get_writing_task_1_general_question():
    try:
        gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \
                           "student to compose a letter. The prompt should present a specific scenario or situation, " \
-                           "requiring the student to provide information, advice, or instructions within the letter."
+                           "requiring the student to provide information, advice, or instructions within the letter. " \
                           "Make sure that the generated prompt does not contain forbidden subjects in muslim countries."
        token_count = count_tokens(gen_wt1_question)["n_tokens"]
        response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None,
                                             GEN_QUESTION_TEMPERATURE)
@@ -391,7 +392,7 @@ def get_speaking_task_1_question():
        gen_sp1_question = "Craft a thought-provoking question for IELTS Speaking Part 1 that encourages candidates to delve deeply " \
                           "into personal experiences, preferences, or insights on diverse topics. Instruct the candidate to offer " \
                           "not only detailed descriptions but also provide nuanced explanations, examples, or anecdotes to enrich " \
-                           "their response." \
+                           "their response. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                           "Provide your response in this json format: {'topic': 'topic','question': 'question'}"
        token_count = count_tokens(gen_sp1_question)["n_tokens"]
        response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_sp1_question, token_count, GEN_FIELDS,
@@ -462,7 +463,7 @@ def get_speaking_task_2_question():
        gen_sp2_question = "Create a question for IELTS Speaking Part 2 that encourages candidates to narrate a personal experience " \
                           "or story related to a randomly selected topic. Include 3 prompts that guide the candidate to describe " \
                           "specific aspects of the experience, such as details about the situation, their actions, and the " \
-                           "reasons it left a lasting impression." \
+                           "reasons it left a lasting impression. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                           "Provide your response in this json format: {'topic': 'topic','question': 'question', " \
                           "'prompts': ['prompt_1', 'prompt_2', 'prompt_3']}"
        token_count = count_tokens(gen_sp2_question)["n_tokens"]
@@ -480,7 +481,8 @@ def get_speaking_task_3_question():
    try:
        gen_sp3_question = "Formulate a set of 3 questions for IELTS Speaking Part 3 that encourage candidates to engage in a " \
                           "meaningful discussion on a particular topic. Provide inquiries, ensuring " \
-                           "they explore various aspects, perspectives, and implications related to the topic." \
+                           "they explore various aspects, perspectives, and implications related to the topic. " \
                           "Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                           "Provide your response in this json format: {'topic': 'topic','questions': ['question', " \
                           "'question', 'question']}"
        token_count = count_tokens(gen_sp3_question)["n_tokens"]
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30
 WRITING_MIN_TIMER_DEFAULT = 60
 SPEAKING_MIN_TIMER_DEFAULT = 14
 BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
                     "cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
                     "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
                     "jews", "jew"]
 EN_US_VOICES = [
    {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
     'SupportedEngines': ['neural', 'standard']},
@@ -113,7 +118,6 @@ topics = [
    "Renewable Energy",
    "Cultural Diversity",
    "Modern Technology Trends",
    "Women's Rights",
    "Sustainable Agriculture",
    "Globalization",
    "Natural Disasters",
@@ -124,7 +128,6 @@ topics = [
    "Literature and Classics",
    "World Geography",
    "Music and Its Influence",
    "Human Rights",
    "Social Media Impact",
    "Food Sustainability",
    "Economics and Markets",
@@ -139,7 +142,6 @@ topics = [
    "Future of Work",
    "Film and Cinema",
    "Genetic Engineering",
    "Ancient Mythology",
    "Climate Policy",
    "Space Travel",
    "Renewable Energy Sources",
@@ -155,7 +157,6 @@ topics = [
    "Yoga and Meditation",
    "Literary Genres",
    "World Oceans",
    "Gender Equality",
    "Social Networking",
    "Sustainable Fashion",
    "International Trade",
@@ -201,7 +202,27 @@ topics = [
    "History of Mathematics",
    "Human-Computer Interaction",
    "Global Health",
-    "Cultural Appropriation"
+    "Cultural Appropriation",
    "Traditional cuisine and culinary arts",
    "Local music and dance traditions",
    "Cultural festivals and celebrations",
    "History of the region and historical landmarks",
    "Traditional crafts and artisanal skills",
    "Wildlife and conservation efforts",
    "Local sports and athletic competitions",
    "Fashion trends and clothing styles",
    "Literature and poetry from the region",
    "Education systems and advancements",
    "Healthcare services and medical innovations",
    "Family values and social dynamics",
    "Travel destinations and tourist attractions",
    "Environmental sustainability projects",
    "Technological developments and innovations",
    "Entrepreneurship and business ventures",
    "Youth empowerment initiatives",
    "Art exhibitions and cultural events",
    "Philanthropy and community development projects",
    "Political developments and civic engagement efforts"
 ]
 two_people_scenarios = [
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -170,10 +170,11 @@ def build_write_blanks_solutions_listening(words: [], start_id):
 def generate_reading_passage(type: QuestionType, topic: str):
    gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
-                                                                                   "of " + topic + ". The passage should offer a substantial amount of " \
+                                                                                   "of '" + topic + "'. The passage should offer a substantial amount of " \
                                                                                                    "information, analysis, or narrative " \
                                                                                                    "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
-                                                                                                   "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
+                                                                                                    "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
                                                                                                    "Make sure that the generated text does not contain forbidden subjects in muslim countries." \
                                                                                                    "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
    token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
@@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str):
 def generate_listening_1_conversation(topic: str):
    gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
                                            "social context of '" + topic + "'. Please include random names and genders " \
-                                                                            "for the characters in your dialogue."
+                                                                            "for the characters in your dialogue. " \
                                                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str):
 def generate_listening_2_monologue(topic: str):
-    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
+    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str):
 def generate_listening_3_conversation(topic: str):
    gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
                                            "in the everyday social context of '" + topic + \
-                                            "'. Please include random names and genders for the characters in your dialogue."
+                                            "'. Please include random names and genders for the characters in your dialogue. " \
                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str):
 def generate_listening_4_monologue(topic: str):
-    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
+    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
    all_exams = get_all("level")
    seen_keys = set()
    for i in range(len(question["questions"])):
-        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
+        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
                                                                         seen_keys)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -5,7 +5,7 @@ import re
 from dotenv import load_dotenv
-from helper.constants import GPT_3_5_TURBO_INSTRUCT
+from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
 from helper.token_counter import count_tokens
 load_dotenv()
@@ -15,7 +15,7 @@ MAX_TOKENS = 4097
 TOP_P = 0.9
 FREQUENCY_PENALTY = 0.5
-TRY_LIMIT = 1
+TRY_LIMIT = 2
 try_count = 0
@@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
        temperature=0.7
    )["choices"][0]["text"]
    if has_blacklisted_words(response) and try_count < TRY_LIMIT:
        try_count = try_count + 1
        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
    elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
        return ""
    if fields_to_check is None:
        return response.replace("\n\n", " ").strip()
@@ -264,3 +270,8 @@ def get_speaking_corrections(text):
    token_count = count_tokens(message)["n_tokens"]
    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
    return response["fixed_text"]
 def has_blacklisted_words(text: str):
    text_lower = text.lower()
    return any(word in text_lower for word in BLACKLISTED_WORDS)