Endpoint generate reading kinda working.
This commit is contained in:
@@ -12,6 +12,12 @@ class QuestionType(Enum):
|
||||
WRITING_TASK_2 = "Writing Task 2"
|
||||
SPEAKING_1 = "Speaking Task Part 1"
|
||||
SPEAKING_2 = "Speaking Task Part 2"
|
||||
READING_PASSAGE_1 = "Reading Passage 1"
|
||||
READING_PASSAGE_2 = "Reading Passage 2"
|
||||
READING_PASSAGE_3 = "Reading Passage 3"
|
||||
|
||||
class ExerciseType(Enum):
|
||||
MULTIPLE_CHOICE = "multiple choice"
|
||||
|
||||
|
||||
def get_grading_messages(question_type: QuestionType, question: str, answer: str, context: str = None):
|
||||
|
||||
120
helper/constants.py
Normal file
120
helper/constants.py
Normal file
@@ -0,0 +1,120 @@
|
||||
GRADING_TEMPERATURE = 0.1
|
||||
TIPS_TEMPERATURE = 0.2
|
||||
GEN_QUESTION_TEMPERATURE = 0.7
|
||||
GPT_3_5_TURBO = "gpt-3.5-turbo"
|
||||
GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
|
||||
GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
|
||||
GRADING_FIELDS = ['comment', 'overall', 'task_response']
|
||||
GEN_FIELDS = ['topic']
|
||||
GEN_TEXT_FIELDS = ['title']
|
||||
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
|
||||
|
||||
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
|
||||
AUDIO_FILES_PATH = 'download-audio/'
|
||||
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
|
||||
|
||||
VIDEO_FILES_PATH = 'download-video/'
|
||||
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
|
||||
|
||||
topics = [
|
||||
"Art and Creativity",
|
||||
"History of Ancient Civilizations",
|
||||
"Environmental Conservation",
|
||||
"Space Exploration",
|
||||
"Artificial Intelligence",
|
||||
"Climate Change",
|
||||
"World Religions",
|
||||
"The Human Brain",
|
||||
"Renewable Energy",
|
||||
"Cultural Diversity",
|
||||
"Modern Technology Trends",
|
||||
"Women's Rights",
|
||||
"Sustainable Agriculture",
|
||||
"Globalization",
|
||||
"Natural Disasters",
|
||||
"Cybersecurity",
|
||||
"Philosophy of Ethics",
|
||||
"Robotics",
|
||||
"Health and Wellness",
|
||||
"Literature and Classics",
|
||||
"World Geography",
|
||||
"Music and Its Influence",
|
||||
"Human Rights",
|
||||
"Social Media Impact",
|
||||
"Food Sustainability",
|
||||
"Economics and Markets",
|
||||
"Human Evolution",
|
||||
"Political Systems",
|
||||
"Mental Health Awareness",
|
||||
"Quantum Physics",
|
||||
"Biodiversity",
|
||||
"Education Reform",
|
||||
"Animal Rights",
|
||||
"The Industrial Revolution",
|
||||
"Future of Work",
|
||||
"Film and Cinema",
|
||||
"Genetic Engineering",
|
||||
"Ancient Mythology",
|
||||
"Climate Policy",
|
||||
"Space Travel",
|
||||
"Renewable Energy Sources",
|
||||
"Cultural Heritage Preservation",
|
||||
"Modern Art Movements",
|
||||
"Immigration Issues",
|
||||
"Sustainable Transportation",
|
||||
"The History of Medicine",
|
||||
"Artificial Neural Networks",
|
||||
"Climate Adaptation",
|
||||
"Philosophy of Existence",
|
||||
"Augmented Reality",
|
||||
"Yoga and Meditation",
|
||||
"Literary Genres",
|
||||
"World Oceans",
|
||||
"Gender Equality",
|
||||
"Social Networking",
|
||||
"Sustainable Fashion",
|
||||
"International Trade",
|
||||
"Prehistoric Era",
|
||||
"Democracy and Governance",
|
||||
"Postcolonial Literature",
|
||||
"Geopolitics",
|
||||
"Psychology and Behavior",
|
||||
"Nanotechnology",
|
||||
"Endangered Species",
|
||||
"Education Technology",
|
||||
"Renaissance Art",
|
||||
"Renewable Energy Policy",
|
||||
"Cultural Festivals",
|
||||
"Modern Architecture",
|
||||
"Climate Resilience",
|
||||
"Artificial Life",
|
||||
"Fitness and Nutrition",
|
||||
"Classic Literature Adaptations",
|
||||
"World History Wars",
|
||||
"Ethical Dilemmas",
|
||||
"Internet of Things (IoT)",
|
||||
"Meditation Practices",
|
||||
"Literary Symbolism",
|
||||
"Marine Conservation",
|
||||
"Social Justice Movements",
|
||||
"Sustainable Tourism",
|
||||
"International Finance",
|
||||
"Ancient Philosophy",
|
||||
"Cold War Era",
|
||||
"Behavioral Economics",
|
||||
"Space Colonization",
|
||||
"Clean Energy Initiatives",
|
||||
"Cultural Exchange",
|
||||
"Modern Sculpture",
|
||||
"Climate Mitigation",
|
||||
"Artificial Intelligence Ethics",
|
||||
"Mindfulness",
|
||||
"Literary Criticism",
|
||||
"Wildlife Conservation",
|
||||
"Political Activism",
|
||||
"Renewable Energy Innovations",
|
||||
"History of Mathematics",
|
||||
"Human-Computer Interaction",
|
||||
"Global Health",
|
||||
"Cultural Appropriation"
|
||||
]
|
||||
72
helper/exercises.py
Normal file
72
helper/exercises.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import queue
|
||||
|
||||
from helper.api_messages import QuestionType
|
||||
from helper.openai_interface import make_openai_instruct_call
|
||||
from helper.token_counter import count_tokens
|
||||
from helper.constants import *
|
||||
|
||||
def divide_number_into_parts(number, parts):
|
||||
if number < parts:
|
||||
return None
|
||||
|
||||
part_size = number // parts
|
||||
remaining = number % parts
|
||||
|
||||
q = queue.Queue()
|
||||
|
||||
for i in range(parts):
|
||||
if i < remaining:
|
||||
q.put(part_size + 1)
|
||||
else:
|
||||
q.put(part_size)
|
||||
|
||||
return q
|
||||
|
||||
def fix_exercise_ids(exercises):
|
||||
# Initialize the starting ID for the first exercise
|
||||
current_id = 1
|
||||
|
||||
# Iterate through exercises
|
||||
for exercise in exercises:
|
||||
questions = exercise["questions"]
|
||||
|
||||
# Iterate through questions and update the "id" value
|
||||
for question in questions:
|
||||
question["id"] = str(current_id)
|
||||
current_id += 1
|
||||
|
||||
return exercises
|
||||
|
||||
|
||||
def generate_reading_passage(type: QuestionType, topic: str):
|
||||
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.READING_PASSAGE_1.value + ", of at least 1500 words, on the topic " \
|
||||
"of " + topic + ". The passage should offer a substantial amount of " \
|
||||
"information, analysis, or narrative " \
|
||||
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
|
||||
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
|
||||
"Provide your response in this json format: {'title': 'title of the text', 'text': 'generated text'}"
|
||||
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
|
||||
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
def gen_multiple_choice_exercise(text: str, quantity: int):
|
||||
gen_multiple_choice_for_text = "Generate" + str(quantity) + "multiple choice questions for this text: " \
|
||||
"'" + text + "'\n" \
|
||||
"Use this format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
|
||||
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
|
||||
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
|
||||
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
|
||||
"'solution': 'C', 'variant': 'text'}]"
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
|
||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_mc_questions = "Parse this '" + mc_questions + "' into this json format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
|
||||
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
|
||||
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
|
||||
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
|
||||
"'solution': 'C', 'variant': 'text'}]"
|
||||
token_count = count_tokens(parse_mc_questions)["n_tokens"]
|
||||
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
@@ -8,7 +8,6 @@ from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
|
||||
MAX_TOKENS = 4097
|
||||
TOP_P = 0.9
|
||||
FREQUENCY_PENALTY = 0.5
|
||||
@@ -16,6 +15,7 @@ FREQUENCY_PENALTY = 0.5
|
||||
TRY_LIMIT = 1
|
||||
|
||||
try_count = 0
|
||||
|
||||
def process_response(input_string, quotation_check_field):
|
||||
if '{' in input_string:
|
||||
try:
|
||||
@@ -24,25 +24,42 @@ def process_response(input_string, quotation_check_field):
|
||||
# Extract everything after the first '{' (inclusive)
|
||||
result = input_string[index:]
|
||||
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE):
|
||||
parsed_string = result.replace("\"", "\\\"")
|
||||
pattern = r"(?<!\w)'|'(?!\w)"
|
||||
parsed_string = re.sub(pattern, '"', parsed_string)
|
||||
parsed_string = parsed_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
|
||||
json_obj = json.loads(parsed_string)
|
||||
json_obj = json.loads(parse_string(result))
|
||||
return json_obj
|
||||
else:
|
||||
json_obj = json.loads(result)
|
||||
parsed_string = result.replace("\n\n", " ")
|
||||
json_obj = json.loads(parsed_string)
|
||||
return json_obj
|
||||
except Exception as e:
|
||||
print(f"Invalid JSON string! Exception: {e}")
|
||||
else:
|
||||
return input_string
|
||||
|
||||
def parse_string(to_parse: str):
|
||||
parsed_string = to_parse.replace("\"", "\\\"")
|
||||
pattern = r"(?<!\w)'|'(?!\w)"
|
||||
parsed_string = re.sub(pattern, '"', parsed_string)
|
||||
parsed_string = parsed_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
return parsed_string
|
||||
|
||||
|
||||
def remove_special_chars_and_escapes(input_string):
|
||||
parsed_string = input_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
# Define a regular expression pattern to match special characters and escapes
|
||||
pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
|
||||
|
||||
# Use re.sub() to replace the matched patterns with an empty string
|
||||
cleaned_string = re.sub(pattern, '', parsed_string)
|
||||
|
||||
return cleaned_string
|
||||
|
||||
|
||||
def check_fields(obj, fields):
|
||||
return all(field in obj for field in fields)
|
||||
|
||||
|
||||
def make_openai_call(model, messages, token_count, fields_to_check, temperature):
|
||||
global try_count
|
||||
result = openai.ChatCompletion.create(
|
||||
@@ -69,4 +86,26 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
|
||||
try_count = 0
|
||||
return processed_response
|
||||
|
||||
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
|
||||
global try_count
|
||||
response = openai.Completion.create(
|
||||
model=model,
|
||||
prompt=message,
|
||||
max_tokens=int(4097 - token_count - 300),
|
||||
temperature=0.7
|
||||
)["choices"][0]["text"]
|
||||
|
||||
if fields_to_check is None:
|
||||
return remove_special_chars_and_escapes(response)
|
||||
|
||||
processed_response = process_response(response, fields_to_check[0])
|
||||
|
||||
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
|
||||
elif try_count >= TRY_LIMIT:
|
||||
try_count = 0
|
||||
return remove_special_chars_and_escapes(response)
|
||||
else:
|
||||
try_count = 0
|
||||
return processed_response
|
||||
|
||||
Reference in New Issue
Block a user