Filter topics and words on exercises.

This commit is contained in:
Cristiano Ferreira
2024-02-08 23:42:02 +00:00
parent 9149e4b197
commit d532f7deb4
4 changed files with 59 additions and 21 deletions

View File

@@ -31,6 +31,11 @@ LISTENING_MIN_TIMER_DEFAULT = 30
WRITING_MIN_TIMER_DEFAULT = 60
SPEAKING_MIN_TIMER_DEFAULT = 14
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "drugs", "alcohol", "nudity", "lgbt", "casino", "gambling", "gaming", "catholicism",
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew"]
EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
'SupportedEngines': ['neural', 'standard']},
@@ -113,7 +118,6 @@ topics = [
"Renewable Energy",
"Cultural Diversity",
"Modern Technology Trends",
"Women's Rights",
"Sustainable Agriculture",
"Globalization",
"Natural Disasters",
@@ -124,7 +128,6 @@ topics = [
"Literature and Classics",
"World Geography",
"Music and Its Influence",
"Human Rights",
"Social Media Impact",
"Food Sustainability",
"Economics and Markets",
@@ -139,7 +142,6 @@ topics = [
"Future of Work",
"Film and Cinema",
"Genetic Engineering",
"Ancient Mythology",
"Climate Policy",
"Space Travel",
"Renewable Energy Sources",
@@ -155,7 +157,6 @@ topics = [
"Yoga and Meditation",
"Literary Genres",
"World Oceans",
"Gender Equality",
"Social Networking",
"Sustainable Fashion",
"International Trade",
@@ -201,7 +202,27 @@ topics = [
"History of Mathematics",
"Human-Computer Interaction",
"Global Health",
"Cultural Appropriation"
"Cultural Appropriation",
"Traditional cuisine and culinary arts",
"Local music and dance traditions",
"Cultural festivals and celebrations",
"History of the region and historical landmarks",
"Traditional crafts and artisanal skills",
"Wildlife and conservation efforts",
"Local sports and athletic competitions",
"Fashion trends and clothing styles",
"Literature and poetry from the region",
"Education systems and advancements",
"Healthcare services and medical innovations",
"Family values and social dynamics",
"Travel destinations and tourist attractions",
"Environmental sustainability projects",
"Technological developments and innovations",
"Entrepreneurship and business ventures",
"Youth empowerment initiatives",
"Art exhibitions and cultural events",
"Philanthropy and community development projects",
"Political developments and civic engagement efforts"
]
two_people_scenarios = [

View File

@@ -170,11 +170,12 @@ def build_write_blanks_solutions_listening(words: [], start_id):
def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
"of " + topic + ". The passage should offer a substantial amount of " \
"information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
"of '" + topic + "'. The passage should offer a substantial amount of " \
"information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
"Make sure that the generated text does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
GEN_QUESTION_TEMPERATURE)
@@ -183,7 +184,8 @@ def generate_reading_passage(type: QuestionType, topic: str):
def generate_listening_1_conversation(topic: str):
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
"social context of '" + topic + "'. Please include random names and genders " \
"for the characters in your dialogue."
"for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
@@ -223,7 +225,7 @@ def generate_listening_1_conversation(topic: str):
def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
@@ -238,7 +240,8 @@ def generate_listening_2_monologue(topic: str):
def generate_listening_3_conversation(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
"in the everyday social context of '" + topic + \
"'. Please include random names and genders for the characters in your dialogue."
"'. Please include random names and genders for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
@@ -277,7 +280,7 @@ def generate_listening_3_conversation(topic: str):
def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
@@ -712,7 +715,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
all_exams = get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
seen_keys)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",

View File

@@ -5,7 +5,7 @@ import re
from dotenv import load_dotenv
from helper.constants import GPT_3_5_TURBO_INSTRUCT
from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
from helper.token_counter import count_tokens
load_dotenv()
@@ -15,7 +15,7 @@ MAX_TOKENS = 4097
TOP_P = 0.9
FREQUENCY_PENALTY = 0.5
TRY_LIMIT = 1
TRY_LIMIT = 2
try_count = 0
@@ -167,6 +167,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
temperature=0.7
)["choices"][0]["text"]
if has_blacklisted_words(response) and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
return ""
if fields_to_check is None:
return response.replace("\n\n", " ").strip()
@@ -264,3 +270,8 @@ def get_speaking_corrections(text):
token_count = count_tokens(message)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
return response["fixed_text"]
def has_blacklisted_words(text: str):
text_lower = text.lower()
return any(word in text_lower for word in BLACKLISTED_WORDS)