Endpoint generate reading kinda working.

2023-10-19 23:39:45 +01:00
parent c3957403f6
commit 274252bf92
7 changed files with 1162 additions and 85 deletions
--- a/helper/api_messages.py
+++ b/helper/api_messages.py
@@ -12,6 +12,12 @@ class QuestionType(Enum):
    WRITING_TASK_2 = "Writing Task 2"
    SPEAKING_1 = "Speaking Task Part 1"
    SPEAKING_2 = "Speaking Task Part 2"
+    READING_PASSAGE_1 = "Reading Passage 1"
+    READING_PASSAGE_2 = "Reading Passage 2"
+    READING_PASSAGE_3 = "Reading Passage 3"
+
+class ExerciseType(Enum):
+    MULTIPLE_CHOICE = "multiple choice"


 def get_grading_messages(question_type: QuestionType, question: str, answer: str, context: str = None):
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -0,0 +1,120 @@
+GRADING_TEMPERATURE = 0.1
+TIPS_TEMPERATURE = 0.2
+GEN_QUESTION_TEMPERATURE = 0.7
+GPT_3_5_TURBO = "gpt-3.5-turbo"
+GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
+GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
+GRADING_FIELDS = ['comment', 'overall', 'task_response']
+GEN_FIELDS = ['topic']
+GEN_TEXT_FIELDS = ['title']
+LISTENING_GEN_FIELDS = ['transcript', 'exercise']
+
+FIREBASE_BUCKET = 'mti-ielts.appspot.com'
+AUDIO_FILES_PATH = 'download-audio/'
+FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
+
+VIDEO_FILES_PATH = 'download-video/'
+FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
+
+topics = [
+    "Art and Creativity",
+    "History of Ancient Civilizations",
+    "Environmental Conservation",
+    "Space Exploration",
+    "Artificial Intelligence",
+    "Climate Change",
+    "World Religions",
+    "The Human Brain",
+    "Renewable Energy",
+    "Cultural Diversity",
+    "Modern Technology Trends",
+    "Women's Rights",
+    "Sustainable Agriculture",
+    "Globalization",
+    "Natural Disasters",
+    "Cybersecurity",
+    "Philosophy of Ethics",
+    "Robotics",
+    "Health and Wellness",
+    "Literature and Classics",
+    "World Geography",
+    "Music and Its Influence",
+    "Human Rights",
+    "Social Media Impact",
+    "Food Sustainability",
+    "Economics and Markets",
+    "Human Evolution",
+    "Political Systems",
+    "Mental Health Awareness",
+    "Quantum Physics",
+    "Biodiversity",
+    "Education Reform",
+    "Animal Rights",
+    "The Industrial Revolution",
+    "Future of Work",
+    "Film and Cinema",
+    "Genetic Engineering",
+    "Ancient Mythology",
+    "Climate Policy",
+    "Space Travel",
+    "Renewable Energy Sources",
+    "Cultural Heritage Preservation",
+    "Modern Art Movements",
+    "Immigration Issues",
+    "Sustainable Transportation",
+    "The History of Medicine",
+    "Artificial Neural Networks",
+    "Climate Adaptation",
+    "Philosophy of Existence",
+    "Augmented Reality",
+    "Yoga and Meditation",
+    "Literary Genres",
+    "World Oceans",
+    "Gender Equality",
+    "Social Networking",
+    "Sustainable Fashion",
+    "International Trade",
+    "Prehistoric Era",
+    "Democracy and Governance",
+    "Postcolonial Literature",
+    "Geopolitics",
+    "Psychology and Behavior",
+    "Nanotechnology",
+    "Endangered Species",
+    "Education Technology",
+    "Renaissance Art",
+    "Renewable Energy Policy",
+    "Cultural Festivals",
+    "Modern Architecture",
+    "Climate Resilience",
+    "Artificial Life",
+    "Fitness and Nutrition",
+    "Classic Literature Adaptations",
+    "World History Wars",
+    "Ethical Dilemmas",
+    "Internet of Things (IoT)",
+    "Meditation Practices",
+    "Literary Symbolism",
+    "Marine Conservation",
+    "Social Justice Movements",
+    "Sustainable Tourism",
+    "International Finance",
+    "Ancient Philosophy",
+    "Cold War Era",
+    "Behavioral Economics",
+    "Space Colonization",
+    "Clean Energy Initiatives",
+    "Cultural Exchange",
+    "Modern Sculpture",
+    "Climate Mitigation",
+    "Artificial Intelligence Ethics",
+    "Mindfulness",
+    "Literary Criticism",
+    "Wildlife Conservation",
+    "Political Activism",
+    "Renewable Energy Innovations",
+    "History of Mathematics",
+    "Human-Computer Interaction",
+    "Global Health",
+    "Cultural Appropriation"
+]
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -0,0 +1,72 @@
+import queue
+
+from helper.api_messages import QuestionType
+from helper.openai_interface import make_openai_instruct_call
+from helper.token_counter import count_tokens
+from helper.constants import *
+
+def divide_number_into_parts(number, parts):
+    if number < parts:
+        return None
+
+    part_size = number // parts
+    remaining = number % parts
+
+    q = queue.Queue()
+
+    for i in range(parts):
+        if i < remaining:
+            q.put(part_size + 1)
+        else:
+            q.put(part_size)
+
+    return q
+
+def fix_exercise_ids(exercises):
+    # Initialize the starting ID for the first exercise
+    current_id = 1
+
+    # Iterate through exercises
+    for exercise in exercises:
+        questions = exercise["questions"]
+
+        # Iterate through questions and update the "id" value
+        for question in questions:
+            question["id"] = str(current_id)
+            current_id += 1
+
+    return exercises
+
+
+def generate_reading_passage(type: QuestionType, topic: str):
+    gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.READING_PASSAGE_1.value + ", of at least 1500 words, on the topic " \
+                            "of " + topic + ". The passage should offer a substantial amount of " \
+                                                            "information, analysis, or narrative " \
+                                                            "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
+                                                            "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
+                                                            "Provide your response in this json format: {'title': 'title of the text', 'text': 'generated text'}"
+    token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
+    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
+                                        GEN_QUESTION_TEMPERATURE)
+
+def gen_multiple_choice_exercise(text: str, quantity: int):
+    gen_multiple_choice_for_text = "Generate" + str(quantity) + "multiple choice questions for this text: " \
+                                   "'" + text + "'\n" \
+                                                           "Use this format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
+                                                           "'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
+                                                           "'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
+                                                           "'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
+                                                           "'solution': 'C', 'variant': 'text'}]"
+    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
+    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
+                                             None,
+                                             GEN_QUESTION_TEMPERATURE)
+    parse_mc_questions = "Parse this '" + mc_questions + "' into this json format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
+                                                         "'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
+                                                         "'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
+                                                         "'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
+                                                         "'solution': 'C', 'variant': 'text'}]"
+    token_count = count_tokens(parse_mc_questions)["n_tokens"]
+    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
+                                                   ["questions"],
+                                                   GEN_QUESTION_TEMPERATURE)
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -8,7 +8,6 @@ from dotenv import load_dotenv
 load_dotenv()
 openai.api_key = os.getenv("OPENAI_API_KEY")

-
 MAX_TOKENS = 4097
 TOP_P = 0.9
 FREQUENCY_PENALTY = 0.5
@@ -16,6 +15,7 @@ FREQUENCY_PENALTY = 0.5
 TRY_LIMIT = 1

 try_count = 0
+
 def process_response(input_string, quotation_check_field):
    if '{' in input_string:
        try:
@@ -24,25 +24,42 @@ def process_response(input_string, quotation_check_field):
            # Extract everything after the first '{' (inclusive)
            result = input_string[index:]
            if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE):
-                parsed_string = result.replace("\"", "\\\"")
-                pattern = r"(?<!\w)'|'(?!\w)"
-                parsed_string = re.sub(pattern, '"', parsed_string)
-                parsed_string = parsed_string.replace("\\\"", "'")
-                parsed_string = parsed_string.replace("\n\n", " ")
-
-                json_obj = json.loads(parsed_string)
+                json_obj = json.loads(parse_string(result))
                return json_obj
            else:
-                json_obj = json.loads(result)
+                parsed_string = result.replace("\n\n", " ")
+                json_obj = json.loads(parsed_string)
                return json_obj
        except Exception as e:
            print(f"Invalid JSON string! Exception: {e}")
    else:
        return input_string

+def parse_string(to_parse: str):
+    parsed_string = to_parse.replace("\"", "\\\"")
+    pattern = r"(?<!\w)'|'(?!\w)"
+    parsed_string = re.sub(pattern, '"', parsed_string)
+    parsed_string = parsed_string.replace("\\\"", "'")
+    parsed_string = parsed_string.replace("\n\n", " ")
+    return parsed_string
+
+
+def remove_special_chars_and_escapes(input_string):
+    parsed_string = input_string.replace("\\\"", "'")
+    parsed_string = parsed_string.replace("\n\n", " ")
+    # Define a regular expression pattern to match special characters and escapes
+    pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
+
+    # Use re.sub() to replace the matched patterns with an empty string
+    cleaned_string = re.sub(pattern, '', parsed_string)
+
+    return cleaned_string
+
+
 def check_fields(obj, fields):
    return all(field in obj for field in fields)

+
 def make_openai_call(model, messages, token_count, fields_to_check, temperature):
    global try_count
    result = openai.ChatCompletion.create(
@@ -69,4 +86,26 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
        try_count = 0
        return processed_response

+def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
+    global try_count
+    response = openai.Completion.create(
+        model=model,
+        prompt=message,
+        max_tokens=int(4097 - token_count - 300),
+        temperature=0.7
+    )["choices"][0]["text"]

+    if fields_to_check is None:
+        return remove_special_chars_and_escapes(response)
+
+    processed_response = process_response(response, fields_to_check[0])
+
+    if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
+        try_count = try_count + 1
+        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
+    elif try_count >= TRY_LIMIT:
+        try_count = 0
+        return remove_special_chars_and_escapes(response)
+    else:
+        try_count = 0
+        return processed_response