From d532f7deb4b7173c912ba09f26057eb0c6f15f59 Mon Sep 17 00:00:00 2001
From: Cristiano Ferreira <cristiano.ferreira@flowinn.biz>
Date: Thu, 8 Feb 2024 23:42:02 +0000
Subject: [PATCH] Filter topics and words on exercises.

---
 app.py                     | 10 ++++++----
 helper/constants.py        | 31 ++++++++++++++++++++++++++-----
 helper/exercises.py        | 24 ++++++++++++++----------
 helper/openai_interface.py | 15 +++++++++++++--
 4 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/app.py b/app.py
index d1a36a9..c4ea63f 100644
--- a/app.py
+++ b/app.py
@@ -243,7 +243,8 @@ def get_writing_task_1_general_question():
     try:
         gen_wt1_question = "Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the " \
                            "student to compose a letter. The prompt should present a specific scenario or situation, " \
-                           "requiring the student to provide information, advice, or instructions within the letter."
+                           "requiring the student to provide information, advice, or instructions within the letter. " \
+                           "Make sure that the generated prompt does not contain forbidden subjects in muslim countries."
         token_count = count_tokens(gen_wt1_question)["n_tokens"]
         response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_wt1_question, token_count, None,
                                              GEN_QUESTION_TEMPERATURE)
@@ -391,7 +392,7 @@ def get_speaking_task_1_question():
         gen_sp1_question = "Craft a thought-provoking question for IELTS Speaking Part 1 that encourages candidates to delve deeply " \
                            "into personal experiences, preferences, or insights on diverse topics. Instruct the candidate to offer " \
                            "not only detailed descriptions but also provide nuanced explanations, examples, or anecdotes to enrich " \
-                           "their response." \
+                           "their response. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                            "Provide your response in this json format: {'topic': 'topic','question': 'question'}"
         token_count = count_tokens(gen_sp1_question)["n_tokens"]
         response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_sp1_question, token_count, GEN_FIELDS,
@@ -462,7 +463,7 @@ def get_speaking_task_2_question():
         gen_sp2_question = "Create a question for IELTS Speaking Part 2 that encourages candidates to narrate a personal experience " \
                            "or story related to a randomly selected topic. Include 3 prompts that guide the candidate to describe " \
                            "specific aspects of the experience, such as details about the situation, their actions, and the " \
-                           "reasons it left a lasting impression." \
+                           "reasons it left a lasting impression. Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                            "Provide your response in this json format: {'topic': 'topic','question': 'question', " \
                            "'prompts': ['prompt_1', 'prompt_2', 'prompt_3']}"
         token_count = count_tokens(gen_sp2_question)["n_tokens"]
@@ -480,7 +481,8 @@ def get_speaking_task_3_question():
     try:
         gen_sp3_question = "Formulate a set of 3 questions for IELTS Speaking Part 3 that encourage candidates to engage in a " \
                            "meaningful discussion on a particular topic. Provide inquiries, ensuring " \
-                           "they explore various aspects, perspectives, and implications related to the topic." \
+                           "they explore various aspects, perspectives, and implications related to the topic. " \
+                           "Make sure that the generated question does not contain forbidden subjects in muslim countries." \
                            "Provide your response in this json format: {'topic': 'topic','questions': ['question', " \
                            "'question', 'question']}"
         token_count = count_tokens(gen_sp3_question)["n_tokens"]
diff --git a/helper/constants.py b/helper/constants.py
index 40f0e50..d743b56 100644
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30
 WRITING_MIN_TIMER_DEFAULT = 60
 SPEAKING_MIN_TIMER_DEFAULT = 14
 
+BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
+                     "cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
+                     "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
+                     "jews", "jew"]
+
 EN_US_VOICES = [
     {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
      'SupportedEngines': ['neural', 'standard']},
@@ -113,7 +118,6 @@ topics = [
     "Renewable Energy",
     "Cultural Diversity",
     "Modern Technology Trends",
-    "Women's Rights",
     "Sustainable Agriculture",
     "Globalization",
     "Natural Disasters",
@@ -124,7 +128,6 @@ topics = [
     "Literature and Classics",
     "World Geography",
     "Music and Its Influence",
-    "Human Rights",
     "Social Media Impact",
     "Food Sustainability",
     "Economics and Markets",
@@ -139,7 +142,6 @@ topics = [
     "Future of Work",
     "Film and Cinema",
     "Genetic Engineering",
-    "Ancient Mythology",
     "Climate Policy",
     "Space Travel",
     "Renewable Energy Sources",
@@ -155,7 +157,6 @@ topics = [
     "Yoga and Meditation",
     "Literary Genres",
     "World Oceans",
-    "Gender Equality",
     "Social Networking",
     "Sustainable Fashion",
     "International Trade",
@@ -201,7 +202,27 @@ topics = [
     "History of Mathematics",
     "Human-Computer Interaction",
     "Global Health",
-    "Cultural Appropriation"
+    "Cultural Appropriation",
+    "Traditional cuisine and culinary arts",
+    "Local music and dance traditions",
+    "Cultural festivals and celebrations",
+    "History of the region and historical landmarks",
+    "Traditional crafts and artisanal skills",
+    "Wildlife and conservation efforts",
+    "Local sports and athletic competitions",
+    "Fashion trends and clothing styles",
+    "Literature and poetry from the region",
+    "Education systems and advancements",
+    "Healthcare services and medical innovations",
+    "Family values and social dynamics",
+    "Travel destinations and tourist attractions",
+    "Environmental sustainability projects",
+    "Technological developments and innovations",
+    "Entrepreneurship and business ventures",
+    "Youth empowerment initiatives",
+    "Art exhibitions and cultural events",
+    "Philanthropy and community development projects",
+    "Political developments and civic engagement efforts"
 ]
 
 two_people_scenarios = [
diff --git a/helper/exercises.py b/helper/exercises.py
index 99cdff7..8112950 100644
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -170,11 +170,12 @@ def build_write_blanks_solutions_listening(words: [], start_id):
 
 def generate_reading_passage(type: QuestionType, topic: str):
     gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
-                                                                                   "of " + topic + ". The passage should offer a substantial amount of " \
-                                                                                                   "information, analysis, or narrative " \
-                                                                                                   "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
-                                                                                                   "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
-                                                                                                   "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
+                                                                                   "of '" + topic + "'. The passage should offer a substantial amount of " \
+                                                                                                    "information, analysis, or narrative " \
+                                                                                                    "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
+                                                                                                    "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
+                                                                                                    "Make sure that the generated text does not contain forbidden subjects in muslim countries." \
+                                                                                                    "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
     token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
     return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
                                      GEN_QUESTION_TEMPERATURE)
@@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str):
 def generate_listening_1_conversation(topic: str):
     gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
                                             "social context of '" + topic + "'. Please include random names and genders " \
-                                                                            "for the characters in your dialogue."
+                                                                            "for the characters in your dialogue. " \
+                                                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
     token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
     response = make_openai_instruct_call(
         GPT_3_5_TURBO_INSTRUCT,
@@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str):
 
 
 def generate_listening_2_monologue(topic: str):
-    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
+    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
     token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
     response = make_openai_instruct_call(
         GPT_3_5_TURBO_INSTRUCT,
@@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str):
 def generate_listening_3_conversation(topic: str):
     gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
                                             "in the everyday social context of '" + topic + \
-                                            "'. Please include random names and genders for the characters in your dialogue."
+                                            "'. Please include random names and genders for the characters in your dialogue. " \
+                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
     token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
     response = make_openai_instruct_call(
         GPT_3_5_TURBO_INSTRUCT,
@@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str):
 
 
 def generate_listening_4_monologue(topic: str):
-    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
+    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
     token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
     response = make_openai_instruct_call(
         GPT_3_5_TURBO_INSTRUCT,
@@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
     all_exams = get_all("level")
     seen_keys = set()
     for i in range(len(question["questions"])):
-        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
+        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
+                                                                         seen_keys)
     return {
         "id": str(uuid.uuid4()),
         "prompt": "Select the appropriate option.",
diff --git a/helper/openai_interface.py b/helper/openai_interface.py
index 1058ea3..3bd86af 100644
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -5,7 +5,7 @@ import re
 
 from dotenv import load_dotenv
 
-from helper.constants import GPT_3_5_TURBO_INSTRUCT
+from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
 from helper.token_counter import count_tokens
 
 load_dotenv()
@@ -15,7 +15,7 @@ MAX_TOKENS = 4097
 TOP_P = 0.9
 FREQUENCY_PENALTY = 0.5
 
-TRY_LIMIT = 1
+TRY_LIMIT = 2
 
 try_count = 0
 
@@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
         temperature=0.7
     )["choices"][0]["text"]
 
+    if has_blacklisted_words(response) and try_count < TRY_LIMIT:
+        try_count = try_count + 1
+        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
+    elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
+        return ""
+
     if fields_to_check is None:
         return response.replace("\n\n", " ").strip()
 
@@ -264,3 +270,8 @@ def get_speaking_corrections(text):
     token_count = count_tokens(message)["n_tokens"]
     response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
     return response["fixed_text"]
+
+
+def has_blacklisted_words(text: str):
+    text_lower = text.lower()
+    return any(word in text_lower for word in BLACKLISTED_WORDS)