Endpoint generate reading kinda working.

This commit is contained in:
Cristiano Ferreira
2023-10-19 23:39:45 +01:00
parent c3957403f6
commit 274252bf92
7 changed files with 1162 additions and 85 deletions

View File

@@ -12,6 +12,12 @@ class QuestionType(Enum):
WRITING_TASK_2 = "Writing Task 2"
SPEAKING_1 = "Speaking Task Part 1"
SPEAKING_2 = "Speaking Task Part 2"
READING_PASSAGE_1 = "Reading Passage 1"
READING_PASSAGE_2 = "Reading Passage 2"
READING_PASSAGE_3 = "Reading Passage 3"
class ExerciseType(Enum):
MULTIPLE_CHOICE = "multiple choice"
def get_grading_messages(question_type: QuestionType, question: str, answer: str, context: str = None):

120
helper/constants.py Normal file
View File

@@ -0,0 +1,120 @@
GRADING_TEMPERATURE = 0.1
TIPS_TEMPERATURE = 0.2
GEN_QUESTION_TEMPERATURE = 0.7
GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
GRADING_FIELDS = ['comment', 'overall', 'task_response']
GEN_FIELDS = ['topic']
GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
AUDIO_FILES_PATH = 'download-audio/'
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
VIDEO_FILES_PATH = 'download-video/'
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
topics = [
"Art and Creativity",
"History of Ancient Civilizations",
"Environmental Conservation",
"Space Exploration",
"Artificial Intelligence",
"Climate Change",
"World Religions",
"The Human Brain",
"Renewable Energy",
"Cultural Diversity",
"Modern Technology Trends",
"Women's Rights",
"Sustainable Agriculture",
"Globalization",
"Natural Disasters",
"Cybersecurity",
"Philosophy of Ethics",
"Robotics",
"Health and Wellness",
"Literature and Classics",
"World Geography",
"Music and Its Influence",
"Human Rights",
"Social Media Impact",
"Food Sustainability",
"Economics and Markets",
"Human Evolution",
"Political Systems",
"Mental Health Awareness",
"Quantum Physics",
"Biodiversity",
"Education Reform",
"Animal Rights",
"The Industrial Revolution",
"Future of Work",
"Film and Cinema",
"Genetic Engineering",
"Ancient Mythology",
"Climate Policy",
"Space Travel",
"Renewable Energy Sources",
"Cultural Heritage Preservation",
"Modern Art Movements",
"Immigration Issues",
"Sustainable Transportation",
"The History of Medicine",
"Artificial Neural Networks",
"Climate Adaptation",
"Philosophy of Existence",
"Augmented Reality",
"Yoga and Meditation",
"Literary Genres",
"World Oceans",
"Gender Equality",
"Social Networking",
"Sustainable Fashion",
"International Trade",
"Prehistoric Era",
"Democracy and Governance",
"Postcolonial Literature",
"Geopolitics",
"Psychology and Behavior",
"Nanotechnology",
"Endangered Species",
"Education Technology",
"Renaissance Art",
"Renewable Energy Policy",
"Cultural Festivals",
"Modern Architecture",
"Climate Resilience",
"Artificial Life",
"Fitness and Nutrition",
"Classic Literature Adaptations",
"World History Wars",
"Ethical Dilemmas",
"Internet of Things (IoT)",
"Meditation Practices",
"Literary Symbolism",
"Marine Conservation",
"Social Justice Movements",
"Sustainable Tourism",
"International Finance",
"Ancient Philosophy",
"Cold War Era",
"Behavioral Economics",
"Space Colonization",
"Clean Energy Initiatives",
"Cultural Exchange",
"Modern Sculpture",
"Climate Mitigation",
"Artificial Intelligence Ethics",
"Mindfulness",
"Literary Criticism",
"Wildlife Conservation",
"Political Activism",
"Renewable Energy Innovations",
"History of Mathematics",
"Human-Computer Interaction",
"Global Health",
"Cultural Appropriation"
]

72
helper/exercises.py Normal file
View File

@@ -0,0 +1,72 @@
import queue
from helper.api_messages import QuestionType
from helper.openai_interface import make_openai_instruct_call
from helper.token_counter import count_tokens
from helper.constants import *
def divide_number_into_parts(number, parts):
if number < parts:
return None
part_size = number // parts
remaining = number % parts
q = queue.Queue()
for i in range(parts):
if i < remaining:
q.put(part_size + 1)
else:
q.put(part_size)
return q
def fix_exercise_ids(exercises):
# Initialize the starting ID for the first exercise
current_id = 1
# Iterate through exercises
for exercise in exercises:
questions = exercise["questions"]
# Iterate through questions and update the "id" value
for question in questions:
question["id"] = str(current_id)
current_id += 1
return exercises
def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.READING_PASSAGE_1.value + ", of at least 1500 words, on the topic " \
"of " + topic + ". The passage should offer a substantial amount of " \
"information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
"Provide your response in this json format: {'title': 'title of the text', 'text': 'generated text'}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
GEN_QUESTION_TEMPERATURE)
def gen_multiple_choice_exercise(text: str, quantity: int):
gen_multiple_choice_for_text = "Generate" + str(quantity) + "multiple choice questions for this text: " \
"'" + text + "'\n" \
"Use this format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
"'solution': 'C', 'variant': 'text'}]"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse this '" + mc_questions + "' into this json format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
"'solution': 'C', 'variant': 'text'}]"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)

View File

@@ -8,7 +8,6 @@ from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
MAX_TOKENS = 4097
TOP_P = 0.9
FREQUENCY_PENALTY = 0.5
@@ -16,6 +15,7 @@ FREQUENCY_PENALTY = 0.5
TRY_LIMIT = 1
try_count = 0
def process_response(input_string, quotation_check_field):
if '{' in input_string:
try:
@@ -24,25 +24,42 @@ def process_response(input_string, quotation_check_field):
# Extract everything after the first '{' (inclusive)
result = input_string[index:]
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE):
parsed_string = result.replace("\"", "\\\"")
pattern = r"(?<!\w)'|'(?!\w)"
parsed_string = re.sub(pattern, '"', parsed_string)
parsed_string = parsed_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ")
json_obj = json.loads(parsed_string)
json_obj = json.loads(parse_string(result))
return json_obj
else:
json_obj = json.loads(result)
parsed_string = result.replace("\n\n", " ")
json_obj = json.loads(parsed_string)
return json_obj
except Exception as e:
print(f"Invalid JSON string! Exception: {e}")
else:
return input_string
def parse_string(to_parse: str):
parsed_string = to_parse.replace("\"", "\\\"")
pattern = r"(?<!\w)'|'(?!\w)"
parsed_string = re.sub(pattern, '"', parsed_string)
parsed_string = parsed_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ")
return parsed_string
def remove_special_chars_and_escapes(input_string):
parsed_string = input_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ")
# Define a regular expression pattern to match special characters and escapes
pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
# Use re.sub() to replace the matched patterns with an empty string
cleaned_string = re.sub(pattern, '', parsed_string)
return cleaned_string
def check_fields(obj, fields):
return all(field in obj for field in fields)
def make_openai_call(model, messages, token_count, fields_to_check, temperature):
global try_count
result = openai.ChatCompletion.create(
@@ -69,4 +86,26 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
try_count = 0
return processed_response
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
global try_count
response = openai.Completion.create(
model=model,
prompt=message,
max_tokens=int(4097 - token_count - 300),
temperature=0.7
)["choices"][0]["text"]
if fields_to_check is None:
return remove_special_chars_and_escapes(response)
processed_response = process_response(response, fields_to_check[0])
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
elif try_count >= TRY_LIMIT:
try_count = 0
return remove_special_chars_and_escapes(response)
else:
try_count = 0
return processed_response