Filter topics and words on exercises.

2024-02-08 23:42:02 +00:00
parent 9149e4b197
commit d532f7deb4
4 changed files with 59 additions and 21 deletions
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30
 WRITING_MIN_TIMER_DEFAULT = 60
 SPEAKING_MIN_TIMER_DEFAULT = 14

+BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
+                     "cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
+                     "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
+                     "jews", "jew"]
+
 EN_US_VOICES = [
    {'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
     'SupportedEngines': ['neural', 'standard']},
@@ -113,7 +118,6 @@ topics = [
    "Renewable Energy",
    "Cultural Diversity",
    "Modern Technology Trends",
-    "Women's Rights",
    "Sustainable Agriculture",
    "Globalization",
    "Natural Disasters",
@@ -124,7 +128,6 @@ topics = [
    "Literature and Classics",
    "World Geography",
    "Music and Its Influence",
-    "Human Rights",
    "Social Media Impact",
    "Food Sustainability",
    "Economics and Markets",
@@ -139,7 +142,6 @@ topics = [
    "Future of Work",
    "Film and Cinema",
    "Genetic Engineering",
-    "Ancient Mythology",
    "Climate Policy",
    "Space Travel",
    "Renewable Energy Sources",
@@ -155,7 +157,6 @@ topics = [
    "Yoga and Meditation",
    "Literary Genres",
    "World Oceans",
-    "Gender Equality",
    "Social Networking",
    "Sustainable Fashion",
    "International Trade",
@@ -201,7 +202,27 @@ topics = [
    "History of Mathematics",
    "Human-Computer Interaction",
    "Global Health",
-    "Cultural Appropriation"
+    "Cultural Appropriation",
+    "Traditional cuisine and culinary arts",
+    "Local music and dance traditions",
+    "Cultural festivals and celebrations",
+    "History of the region and historical landmarks",
+    "Traditional crafts and artisanal skills",
+    "Wildlife and conservation efforts",
+    "Local sports and athletic competitions",
+    "Fashion trends and clothing styles",
+    "Literature and poetry from the region",
+    "Education systems and advancements",
+    "Healthcare services and medical innovations",
+    "Family values and social dynamics",
+    "Travel destinations and tourist attractions",
+    "Environmental sustainability projects",
+    "Technological developments and innovations",
+    "Entrepreneurship and business ventures",
+    "Youth empowerment initiatives",
+    "Art exhibitions and cultural events",
+    "Philanthropy and community development projects",
+    "Political developments and civic engagement efforts"
 ]

 two_people_scenarios = [
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -170,11 +170,12 @@ def build_write_blanks_solutions_listening(words: [], start_id):

 def generate_reading_passage(type: QuestionType, topic: str):
    gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
-                                                                                   "of " + topic + ". The passage should offer a substantial amount of " \
-                                                                                                   "information, analysis, or narrative " \
-                                                                                                   "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
-                                                                                                   "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
-                                                                                                   "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
+                                                                                   "of '" + topic + "'. The passage should offer a substantial amount of " \
+                                                                                                    "information, analysis, or narrative " \
+                                                                                                    "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
+                                                                                                    "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
+                                                                                                    "Make sure that the generated text does not contain forbidden subjects in muslim countries." \
+                                                                                                    "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
    token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
                                     GEN_QUESTION_TEMPERATURE)
@@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str):
 def generate_listening_1_conversation(topic: str):
    gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
                                            "social context of '" + topic + "'. Please include random names and genders " \
-                                                                            "for the characters in your dialogue."
+                                                                            "for the characters in your dialogue. " \
+                                                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str):


 def generate_listening_2_monologue(topic: str):
-    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
+    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str):
 def generate_listening_3_conversation(topic: str):
    gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
                                            "in the everyday social context of '" + topic + \
-                                            "'. Please include random names and genders for the characters in your dialogue."
+                                            "'. Please include random names and genders for the characters in your dialogue. " \
+                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str):


 def generate_listening_4_monologue(topic: str):
-    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
+    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
    token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
@@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
    all_exams = get_all("level")
    seen_keys = set()
    for i in range(len(question["questions"])):
-        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
+        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
+                                                                         seen_keys)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -5,7 +5,7 @@ import re

 from dotenv import load_dotenv

-from helper.constants import GPT_3_5_TURBO_INSTRUCT
+from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
 from helper.token_counter import count_tokens

 load_dotenv()
@@ -15,7 +15,7 @@ MAX_TOKENS = 4097
 TOP_P = 0.9
 FREQUENCY_PENALTY = 0.5

-TRY_LIMIT = 1
+TRY_LIMIT = 2

 try_count = 0

@@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
        temperature=0.7
    )["choices"][0]["text"]

+    if has_blacklisted_words(response) and try_count < TRY_LIMIT:
+        try_count = try_count + 1
+        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
+    elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
+        return ""
+
    if fields_to_check is None:
        return response.replace("\n\n", " ").strip()

@@ -264,3 +270,8 @@ def get_speaking_corrections(text):
    token_count = count_tokens(message)["n_tokens"]
    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
    return response["fixed_text"]
+
+
+def has_blacklisted_words(text: str):
+    text_lower = text.lower()
+    return any(word in text_lower for word in BLACKLISTED_WORDS)