Compare commits
58 Commits
refactor-t
...
new-custom
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9df4889517 | ||
|
|
cf7a966141 | ||
|
|
d68617f33b | ||
|
|
eeaa04f856 | ||
|
|
beccf8b501 | ||
|
|
470f4cc83b | ||
|
|
3ad411ed71 | ||
|
|
7144a3f3ca | ||
|
|
b795a3fb79 | ||
|
|
034be25e8e | ||
|
|
a931f06c47 | ||
|
|
8e56a3228b | ||
|
|
14c5914420 | ||
|
|
6878e0a276 | ||
|
|
1f29ac6ee5 | ||
|
|
a1ee7e47da | ||
|
|
adfc027458 | ||
|
|
3a7bb7764f | ||
|
|
19f204d74d | ||
|
|
88ba9ab561 | ||
|
|
34afb5d1e8 | ||
|
|
eb904f836a | ||
|
|
ca12ad1161 | ||
|
|
8b8460517c | ||
|
|
9be9bfce0e | ||
|
|
4776f24229 | ||
|
|
bf9251eebb | ||
|
|
1ecda04c6b | ||
|
|
d5621c1793 | ||
|
|
4c41942dfe | ||
|
|
bef606fe14 | ||
|
|
358f240d16 | ||
|
|
e7d84b9704 | ||
|
|
b4dc6be927 | ||
|
|
afca610c09 | ||
|
|
495502bc93 | ||
|
|
565874ad41 | ||
|
|
e693f5ee2a | ||
|
|
a8b46160d4 | ||
|
|
640039d372 | ||
|
|
a3cd1cdf59 | ||
|
|
9a696bbeb5 | ||
|
|
2adb7d1847 | ||
|
|
b93ead3a7b | ||
|
|
ad3a32ce45 | ||
|
|
ee5f23b3d7 | ||
|
|
545aee1a19 | ||
|
|
3f749f1ff5 | ||
|
|
32ac2149f5 | ||
|
|
64cc207fe8 | ||
|
|
a4caecdb4f | ||
|
|
20dfd5be78 | ||
|
|
1d110d5fa9 | ||
|
|
7633822916 | ||
|
|
9bc06d8340 | ||
|
|
4ff3b02a1d | ||
|
|
7637322239 | ||
|
|
3676d7ad39 |
1
.env
1
.env
@@ -3,3 +3,4 @@ JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
|
|||||||
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
|
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
|
||||||
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
|
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
|
||||||
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
|
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
|
||||||
|
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@ __pycache__
|
|||||||
.idea
|
.idea
|
||||||
.env
|
.env
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
/firebase-configs/test_firebase.json
|
||||||
|
|||||||
8
.idea/.gitignore
generated
vendored
8
.idea/.gitignore
generated
vendored
@@ -1,8 +0,0 @@
|
|||||||
# Default ignored files
|
|
||||||
/shelf/
|
|
||||||
/workspace.xml
|
|
||||||
# Editor-based HTTP Client requests
|
|
||||||
/httpRequests/
|
|
||||||
# Datasource local storage ignored files
|
|
||||||
/dataSources/
|
|
||||||
/dataSources.local.xml
|
|
||||||
20
.idea/ielts-be.iml
generated
20
.idea/ielts-be.iml
generated
@@ -1,24 +1,14 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<module type="PYTHON_MODULE" version="4">
|
<module type="PYTHON_MODULE" version="4">
|
||||||
<component name="Flask">
|
|
||||||
<option name="enabled" value="true" />
|
|
||||||
</component>
|
|
||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Python 3.11 (ielts-be)" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PackageRequirementsSettings">
|
<component name="PyDocumentationSettings">
|
||||||
<option name="versionSpecifier" value="Don't specify version" />
|
<option name="format" value="GOOGLE" />
|
||||||
</component>
|
<option name="myDocStringFormat" value="Google" />
|
||||||
<component name="TemplatesService">
|
|
||||||
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
|
|
||||||
<option name="TEMPLATE_FOLDERS">
|
|
||||||
<list>
|
|
||||||
<option value="$MODULE_DIR$/../flaskProject\templates" />
|
|
||||||
</list>
|
|
||||||
</option>
|
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
8
.idea/misc.xml
generated
8
.idea/misc.xml
generated
@@ -1,4 +1,10 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.11 (ielts-be)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ielts-be)" project-jdk-type="Python SDK" />
|
||||||
|
<component name="PyCharmProfessionalAdvertiser">
|
||||||
|
<option name="shown" value="true" />
|
||||||
|
</component>
|
||||||
</project>
|
</project>
|
||||||
2
.idea/vcs.xml
generated
2
.idea/vcs.xml
generated
@@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
<mapping directory="" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
BIN
faiss/ct_focus_tips_index.faiss
Normal file
BIN
faiss/ct_focus_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/language_for_writing_tips_index.faiss
Normal file
BIN
faiss/language_for_writing_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/reading_skill_tips_index.faiss
Normal file
BIN
faiss/reading_skill_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/strategy_tips_index.faiss
Normal file
BIN
faiss/strategy_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/tips_metadata.pkl
Normal file
BIN
faiss/tips_metadata.pkl
Normal file
Binary file not shown.
BIN
faiss/word_link_tips_index.faiss
Normal file
BIN
faiss/word_link_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/word_partners_tips_index.faiss
Normal file
BIN
faiss/word_partners_tips_index.faiss
Normal file
Binary file not shown.
BIN
faiss/writing_skill_tips_index.faiss
Normal file
BIN
faiss/writing_skill_tips_index.faiss
Normal file
Binary file not shown.
@@ -18,7 +18,13 @@ GEN_FIELDS = ['topic']
|
|||||||
GEN_TEXT_FIELDS = ['title']
|
GEN_TEXT_FIELDS = ['title']
|
||||||
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
|
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
|
||||||
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
|
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
|
||||||
|
READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
|
||||||
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
||||||
|
LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
|
||||||
|
'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
|
||||||
|
LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
|
||||||
|
LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
|
||||||
|
LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
||||||
|
|
||||||
TOTAL_READING_PASSAGE_1_EXERCISES = 13
|
TOTAL_READING_PASSAGE_1_EXERCISES = 13
|
||||||
TOTAL_READING_PASSAGE_2_EXERCISES = 13
|
TOTAL_READING_PASSAGE_2_EXERCISES = 13
|
||||||
@@ -35,7 +41,7 @@ SPEAKING_MIN_TIMER_DEFAULT = 14
|
|||||||
|
|
||||||
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
|
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
|
||||||
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
|
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
|
||||||
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
|
"discrimination", "politic", "christianity", "islam", "christian", "christians",
|
||||||
"jews", "jew", "discrimination", "discriminatory"]
|
"jews", "jew", "discrimination", "discriminatory"]
|
||||||
|
|
||||||
EN_US_VOICES = [
|
EN_US_VOICES = [
|
||||||
@@ -141,7 +147,6 @@ mti_topics = [
|
|||||||
"Poverty Alleviation",
|
"Poverty Alleviation",
|
||||||
"Cybersecurity and Privacy",
|
"Cybersecurity and Privacy",
|
||||||
"Human Rights",
|
"Human Rights",
|
||||||
"Social Justice",
|
|
||||||
"Food and Agriculture",
|
"Food and Agriculture",
|
||||||
"Cyberbullying and Online Safety",
|
"Cyberbullying and Online Safety",
|
||||||
"Linguistic Diversity",
|
"Linguistic Diversity",
|
||||||
@@ -169,7 +174,6 @@ topics = [
|
|||||||
"Space Exploration",
|
"Space Exploration",
|
||||||
"Artificial Intelligence",
|
"Artificial Intelligence",
|
||||||
"Climate Change",
|
"Climate Change",
|
||||||
"World Religions",
|
|
||||||
"The Human Brain",
|
"The Human Brain",
|
||||||
"Renewable Energy",
|
"Renewable Energy",
|
||||||
"Cultural Diversity",
|
"Cultural Diversity",
|
||||||
@@ -232,7 +236,6 @@ topics = [
|
|||||||
"Meditation Practices",
|
"Meditation Practices",
|
||||||
"Literary Symbolism",
|
"Literary Symbolism",
|
||||||
"Marine Conservation",
|
"Marine Conservation",
|
||||||
"Social Justice Movements",
|
|
||||||
"Sustainable Tourism",
|
"Sustainable Tourism",
|
||||||
"Ancient Philosophy",
|
"Ancient Philosophy",
|
||||||
"Cold War Era",
|
"Cold War Era",
|
||||||
@@ -656,3 +659,19 @@ academic_subjects = [
|
|||||||
"Ecology",
|
"Ecology",
|
||||||
"International Business"
|
"International Business"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
grammar_types = [
|
||||||
|
"parts of speech",
|
||||||
|
"parts of speech - Nouns",
|
||||||
|
"parts of speech - Pronouns",
|
||||||
|
"parts of speech - Verbs",
|
||||||
|
"parts of speech - Adverbs",
|
||||||
|
"parts of speech - Adjectives",
|
||||||
|
"parts of speech - Conjunctions",
|
||||||
|
"parts of speech - Prepositions",
|
||||||
|
"parts of speech - Interjections",
|
||||||
|
"sentence structure",
|
||||||
|
"types of sentences",
|
||||||
|
"tenses",
|
||||||
|
"active voice and passive voice"
|
||||||
|
]
|
||||||
|
|||||||
1383
helper/exercises.py
1383
helper/exercises.py
File diff suppressed because it is too large
Load Diff
50
helper/gpt_zero.py
Normal file
50
helper/gpt_zero.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from logging import getLogger
|
||||||
|
from typing import Dict, Optional
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class GPTZero:
|
||||||
|
_GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
|
||||||
|
|
||||||
|
def __init__(self, gpt_zero_key: str):
|
||||||
|
self._logger = getLogger(__name__)
|
||||||
|
if gpt_zero_key is None:
|
||||||
|
self._logger.warning('GPT Zero key was not included! Skipping ai detection when grading.')
|
||||||
|
self._gpt_zero_key = gpt_zero_key
|
||||||
|
self._header = {
|
||||||
|
'x-api-key': gpt_zero_key
|
||||||
|
}
|
||||||
|
|
||||||
|
def run_detection(self, text: str):
|
||||||
|
if self._gpt_zero_key is None:
|
||||||
|
return None
|
||||||
|
data = {
|
||||||
|
'document': text,
|
||||||
|
'version': '',
|
||||||
|
'multilingual': False
|
||||||
|
}
|
||||||
|
response = requests.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
|
||||||
|
if response.status_code != 200:
|
||||||
|
self._logger.error(f'GPT\'s Zero Endpoint returned with {response.status_code}: {response.json()}')
|
||||||
|
return None
|
||||||
|
return self._parse_detection(response.json())
|
||||||
|
|
||||||
|
def _parse_detection(self, response: Dict) -> Optional[Dict]:
|
||||||
|
try:
|
||||||
|
text_scan = response["documents"][0]
|
||||||
|
filtered_sentences = [
|
||||||
|
{
|
||||||
|
"sentence": item["sentence"],
|
||||||
|
"highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
|
||||||
|
}
|
||||||
|
for item in text_scan["sentences"]
|
||||||
|
]
|
||||||
|
return {
|
||||||
|
"class_probabilities": text_scan["class_probabilities"],
|
||||||
|
"confidence_category": text_scan["confidence_category"],
|
||||||
|
"predicted_class": text_scan["predicted_class"],
|
||||||
|
"sentences": filtered_sentences
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
|
||||||
|
return None
|
||||||
@@ -1,17 +1,19 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
from logging import getLogger
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
import app
|
|
||||||
from helper.constants import *
|
from helper.constants import *
|
||||||
from helper.firebase_helper import upload_file_firebase_get_url, save_to_db_with_id
|
from helper.firebase_helper import upload_file_firebase_get_url, save_to_db_with_id
|
||||||
from heygen.AvatarEnum import AvatarEnum
|
from heygen.AvatarEnum import AvatarEnum
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
# Get HeyGen token
|
# Get HeyGen token
|
||||||
TOKEN = os.getenv("HEY_GEN_TOKEN")
|
TOKEN = os.getenv("HEY_GEN_TOKEN")
|
||||||
FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
|
FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
|
||||||
@@ -29,26 +31,32 @@ GET_HEADER = {
|
|||||||
|
|
||||||
|
|
||||||
def create_videos_and_save_to_db(exercises, template, id):
|
def create_videos_and_save_to_db(exercises, template, id):
|
||||||
|
avatar = random.choice(list(AvatarEnum))
|
||||||
# Speaking 1
|
# Speaking 1
|
||||||
# Using list comprehension to find the element with the desired value in the 'type' field
|
# Using list comprehension to find the element with the desired value in the 'type' field
|
||||||
found_exercises_1 = [element for element in exercises if element.get('type') == 1]
|
found_exercises_1 = [element for element in exercises if element.get('type') == 1]
|
||||||
# Check if any elements were found
|
# Check if any elements were found
|
||||||
if found_exercises_1:
|
if found_exercises_1:
|
||||||
exercise_1 = found_exercises_1[0]
|
exercise_1 = found_exercises_1[0]
|
||||||
app.app.logger.info('Creating video for speaking part 1')
|
sp1_questions = []
|
||||||
sp1_result = create_video(exercise_1["question"], random.choice(list(AvatarEnum)))
|
logger.info('Creating video for speaking part 1')
|
||||||
|
for question in exercise_1["questions"]:
|
||||||
|
sp1_result = create_video(question, avatar)
|
||||||
if sp1_result is not None:
|
if sp1_result is not None:
|
||||||
sound_file_path = VIDEO_FILES_PATH + sp1_result
|
sound_file_path = VIDEO_FILES_PATH + sp1_result
|
||||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
|
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
|
||||||
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||||
sp1_video_path = firebase_file_path
|
video = {
|
||||||
sp1_video_url = url
|
"text": question,
|
||||||
template["exercises"][0]["text"] = exercise_1["question"]
|
"video_path": firebase_file_path,
|
||||||
template["exercises"][0]["title"] = exercise_1["topic"]
|
"video_url": url
|
||||||
template["exercises"][0]["video_url"] = sp1_video_url
|
}
|
||||||
template["exercises"][0]["video_path"] = sp1_video_path
|
sp1_questions.append(video)
|
||||||
else:
|
else:
|
||||||
app.app.logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
|
logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
|
||||||
|
template["exercises"][0]["prompts"] = sp1_questions
|
||||||
|
template["exercises"][0]["first_title"] = exercise_1["first_topic"]
|
||||||
|
template["exercises"][0]["second_title"] = exercise_1["second_topic"]
|
||||||
|
|
||||||
# Speaking 2
|
# Speaking 2
|
||||||
# Using list comprehension to find the element with the desired value in the 'type' field
|
# Using list comprehension to find the element with the desired value in the 'type' field
|
||||||
@@ -56,8 +64,8 @@ def create_videos_and_save_to_db(exercises, template, id):
|
|||||||
# Check if any elements were found
|
# Check if any elements were found
|
||||||
if found_exercises_2:
|
if found_exercises_2:
|
||||||
exercise_2 = found_exercises_2[0]
|
exercise_2 = found_exercises_2[0]
|
||||||
app.app.logger.info('Creating video for speaking part 2')
|
logger.info('Creating video for speaking part 2')
|
||||||
sp2_result = create_video(exercise_2["question"], random.choice(list(AvatarEnum)))
|
sp2_result = create_video(exercise_2["question"], avatar)
|
||||||
if sp2_result is not None:
|
if sp2_result is not None:
|
||||||
sound_file_path = VIDEO_FILES_PATH + sp2_result
|
sound_file_path = VIDEO_FILES_PATH + sp2_result
|
||||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result
|
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result
|
||||||
@@ -70,7 +78,7 @@ def create_videos_and_save_to_db(exercises, template, id):
|
|||||||
template["exercises"][1]["video_url"] = sp2_video_url
|
template["exercises"][1]["video_url"] = sp2_video_url
|
||||||
template["exercises"][1]["video_path"] = sp2_video_path
|
template["exercises"][1]["video_path"] = sp2_video_path
|
||||||
else:
|
else:
|
||||||
app.app.logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
|
logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
|
||||||
|
|
||||||
# Speaking 3
|
# Speaking 3
|
||||||
# Using list comprehension to find the element with the desired value in the 'type' field
|
# Using list comprehension to find the element with the desired value in the 'type' field
|
||||||
@@ -79,8 +87,7 @@ def create_videos_and_save_to_db(exercises, template, id):
|
|||||||
if found_exercises_3:
|
if found_exercises_3:
|
||||||
exercise_3 = found_exercises_3[0]
|
exercise_3 = found_exercises_3[0]
|
||||||
sp3_questions = []
|
sp3_questions = []
|
||||||
avatar = random.choice(list(AvatarEnum))
|
logger.info('Creating videos for speaking part 3')
|
||||||
app.app.logger.info('Creating videos for speaking part 3')
|
|
||||||
for question in exercise_3["questions"]:
|
for question in exercise_3["questions"]:
|
||||||
result = create_video(question, avatar)
|
result = create_video(question, avatar)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
@@ -94,7 +101,7 @@ def create_videos_and_save_to_db(exercises, template, id):
|
|||||||
}
|
}
|
||||||
sp3_questions.append(video)
|
sp3_questions.append(video)
|
||||||
else:
|
else:
|
||||||
app.app.logger.error("Failed to create video for part 3 question: " + question)
|
logger.error("Failed to create video for part 3 question: " + question)
|
||||||
template["exercises"][2]["prompts"] = sp3_questions
|
template["exercises"][2]["prompts"] = sp3_questions
|
||||||
template["exercises"][2]["title"] = exercise_3["topic"]
|
template["exercises"][2]["title"] = exercise_3["topic"]
|
||||||
|
|
||||||
@@ -106,7 +113,7 @@ def create_videos_and_save_to_db(exercises, template, id):
|
|||||||
template["exercises"].pop(0)
|
template["exercises"].pop(0)
|
||||||
|
|
||||||
save_to_db_with_id("speaking", template, id)
|
save_to_db_with_id("speaking", template, id)
|
||||||
app.app.logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
|
logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
|
||||||
|
|
||||||
|
|
||||||
def create_video(text, avatar):
|
def create_video(text, avatar):
|
||||||
@@ -127,8 +134,8 @@ def create_video(text, avatar):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
response = requests.post(create_video_url, headers=POST_HEADER, json=data)
|
response = requests.post(create_video_url, headers=POST_HEADER, json=data)
|
||||||
app.app.logger.info(response.status_code)
|
logger.info(response.status_code)
|
||||||
app.app.logger.info(response.json())
|
logger.info(response.json())
|
||||||
|
|
||||||
# GET TO CHECK STATUS AND GET VIDEO WHEN READY
|
# GET TO CHECK STATUS AND GET VIDEO WHEN READY
|
||||||
video_id = response.json()["data"]["video_id"]
|
video_id = response.json()["data"]["video_id"]
|
||||||
@@ -147,11 +154,11 @@ def create_video(text, avatar):
|
|||||||
error = response_data["data"]["error"]
|
error = response_data["data"]["error"]
|
||||||
|
|
||||||
if status != "completed" and error is None:
|
if status != "completed" and error is None:
|
||||||
app.app.logger.info(f"Status: {status}")
|
logger.info(f"Status: {status}")
|
||||||
time.sleep(10) # Wait for 10 second before the next request
|
time.sleep(10) # Wait for 10 second before the next request
|
||||||
|
|
||||||
app.app.logger.info(response.status_code)
|
logger.info(response.status_code)
|
||||||
app.app.logger.info(response.json())
|
logger.info(response.json())
|
||||||
|
|
||||||
# DOWNLOAD VIDEO
|
# DOWNLOAD VIDEO
|
||||||
download_url = response.json()['data']['video_url']
|
download_url = response.json()['data']['video_url']
|
||||||
@@ -165,8 +172,8 @@ def create_video(text, avatar):
|
|||||||
output_path = os.path.join(output_directory, output_filename)
|
output_path = os.path.join(output_directory, output_filename)
|
||||||
with open(output_path, 'wb') as f:
|
with open(output_path, 'wb') as f:
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
app.app.logger.info(f"File '{output_filename}' downloaded successfully.")
|
logger.info(f"File '{output_filename}' downloaded successfully.")
|
||||||
return output_filename
|
return output_filename
|
||||||
else:
|
else:
|
||||||
app.app.logger.error(f"Failed to download file. Status code: {response.status_code}")
|
logger.error(f"Failed to download file. Status code: {response.status_code}")
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
|
from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
|
||||||
from helper.token_counter import count_tokens
|
from helper.token_counter import count_tokens
|
||||||
@@ -54,7 +54,7 @@ def check_fields(obj, fields):
|
|||||||
return all(field in obj for field in fields)
|
return all(field in obj for field in fields)
|
||||||
|
|
||||||
|
|
||||||
def make_openai_call(model, messages, token_count, fields_to_check, temperature):
|
def make_openai_call(model, messages, token_count, fields_to_check, temperature, check_blacklisted=True):
|
||||||
global try_count
|
global try_count
|
||||||
result = client.chat.completions.create(
|
result = client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
@@ -65,6 +65,7 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
|
|||||||
)
|
)
|
||||||
result = result.choices[0].message.content
|
result = result.choices[0].message.content
|
||||||
|
|
||||||
|
if check_blacklisted:
|
||||||
found_blacklisted_word = get_found_blacklisted_words(result)
|
found_blacklisted_word = get_found_blacklisted_words(result)
|
||||||
|
|
||||||
if found_blacklisted_word is not None and try_count < TRY_LIMIT:
|
if found_blacklisted_word is not None and try_count < TRY_LIMIT:
|
||||||
@@ -188,7 +189,7 @@ def get_fixed_text(text):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
token_count = count_total_tokens(messages)
|
token_count = count_total_tokens(messages)
|
||||||
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
|
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
|
||||||
return response["fixed_text"]
|
return response["fixed_text"]
|
||||||
|
|
||||||
|
|
||||||
@@ -203,7 +204,7 @@ def get_speaking_corrections(text):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
token_count = count_total_tokens(messages)
|
token_count = count_total_tokens(messages)
|
||||||
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
|
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
|
||||||
return response["fixed_text"]
|
return response["fixed_text"]
|
||||||
|
|
||||||
|
|
||||||
@@ -211,6 +212,7 @@ def has_blacklisted_words(text: str):
|
|||||||
text_lower = text.lower()
|
text_lower = text.lower()
|
||||||
return any(word in text_lower for word in BLACKLISTED_WORDS)
|
return any(word in text_lower for word in BLACKLISTED_WORDS)
|
||||||
|
|
||||||
|
|
||||||
def get_found_blacklisted_words(text: str):
|
def get_found_blacklisted_words(text: str):
|
||||||
text_lower = text.lower()
|
text_lower = text.lower()
|
||||||
for word in BLACKLISTED_WORDS:
|
for word in BLACKLISTED_WORDS:
|
||||||
@@ -218,6 +220,7 @@ def get_found_blacklisted_words(text: str):
|
|||||||
return word
|
return word
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def remove_special_characters_from_beginning(string):
|
def remove_special_characters_from_beginning(string):
|
||||||
cleaned_string = string.lstrip('\n')
|
cleaned_string = string.lstrip('\n')
|
||||||
if string.startswith("'") or string.startswith('"'):
|
if string.startswith("'") or string.startswith('"'):
|
||||||
@@ -239,6 +242,7 @@ def replace_expression_in_object(obj, expression, replacement):
|
|||||||
obj[key] = replace_expression_in_object(obj[key], expression, replacement)
|
obj[key] = replace_expression_in_object(obj[key], expression, replacement)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def count_total_tokens(messages):
|
def count_total_tokens(messages):
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
|||||||
@@ -1136,12 +1136,11 @@ def getSpeakingTemplate():
|
|||||||
"exercises": [
|
"exercises": [
|
||||||
{
|
{
|
||||||
"id": str(uuid.uuid4()),
|
"id": str(uuid.uuid4()),
|
||||||
"prompts": [],
|
"prompts": ["questions"],
|
||||||
"text": "text",
|
"text": "Listen carefully and respond.",
|
||||||
"title": "topic",
|
"first_title": "first_topic",
|
||||||
"video_url": "sp1_video_url",
|
"second_title": "second_topic",
|
||||||
"video_path": "sp1_video_path",
|
"type": "interactiveSpeaking"
|
||||||
"type": "speaking"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": str(uuid.uuid4()),
|
"id": str(uuid.uuid4()),
|
||||||
|
|||||||
@@ -95,17 +95,26 @@ def conversation_text_to_speech(conversation: list, file_name: str):
|
|||||||
|
|
||||||
|
|
||||||
def has_words(text: str):
|
def has_words(text: str):
|
||||||
|
if not has_common_words(text):
|
||||||
|
return False
|
||||||
english_words = set(words.words())
|
english_words = set(words.words())
|
||||||
words_in_input = text.split()
|
words_in_input = text.split()
|
||||||
return any(word.lower() in english_words for word in words_in_input)
|
return any(word.lower() in english_words for word in words_in_input)
|
||||||
|
|
||||||
|
|
||||||
def has_x_words(text: str, quantity):
|
def has_x_words(text: str, quantity):
|
||||||
|
if not has_common_words(text):
|
||||||
|
return False
|
||||||
english_words = set(words.words())
|
english_words = set(words.words())
|
||||||
words_in_input = text.split()
|
words_in_input = text.split()
|
||||||
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
||||||
return english_word_count >= quantity
|
return english_word_count >= quantity
|
||||||
|
|
||||||
|
def has_common_words(text: str):
|
||||||
|
english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"}
|
||||||
|
words_in_input = text.split()
|
||||||
|
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
||||||
|
return english_word_count >= 10
|
||||||
|
|
||||||
def divide_text(text, max_length=3000):
|
def divide_text(text, max_length=3000):
|
||||||
if len(text) <= max_length:
|
if len(text) <= max_length:
|
||||||
|
|||||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
9
training_content/__init__.py
Normal file
9
training_content/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from .kb import TrainingContentKnowledgeBase
|
||||||
|
from .service import TrainingContentService
|
||||||
|
from .gpt import GPT
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"TrainingContentService",
|
||||||
|
"TrainingContentKnowledgeBase",
|
||||||
|
"GPT"
|
||||||
|
]
|
||||||
29
training_content/dtos.py
Normal file
29
training_content/dtos.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
class QueryDTO(BaseModel):
|
||||||
|
category: str
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class DetailsDTO(BaseModel):
|
||||||
|
exam_id: str
|
||||||
|
date: int
|
||||||
|
performance_comment: str
|
||||||
|
detailed_summary: str
|
||||||
|
|
||||||
|
|
||||||
|
class WeakAreaDTO(BaseModel):
|
||||||
|
area: str
|
||||||
|
comment: str
|
||||||
|
|
||||||
|
|
||||||
|
class TrainingContentDTO(BaseModel):
|
||||||
|
details: List[DetailsDTO]
|
||||||
|
weak_areas: List[WeakAreaDTO]
|
||||||
|
queries: List[QueryDTO]
|
||||||
|
|
||||||
|
|
||||||
|
class TipsDTO(BaseModel):
|
||||||
|
tip_ids: List[str]
|
||||||
64
training_content/gpt.py
Normal file
64
training_content/gpt.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import json
|
||||||
|
from logging import getLogger
|
||||||
|
|
||||||
|
from typing import List, Optional, Callable
|
||||||
|
|
||||||
|
from openai.types.chat import ChatCompletionMessageParam
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class GPT:
|
||||||
|
|
||||||
|
def __init__(self, openai_client):
|
||||||
|
self._client = openai_client
|
||||||
|
self._default_model = "gpt-4o"
|
||||||
|
self._logger = getLogger(__name__)
|
||||||
|
|
||||||
|
def prediction(
|
||||||
|
self,
|
||||||
|
messages: List[ChatCompletionMessageParam],
|
||||||
|
map_to_model: Callable,
|
||||||
|
json_scheme: str,
|
||||||
|
*,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
temperature: Optional[float] = None,
|
||||||
|
max_retries: int = 3
|
||||||
|
) -> List[BaseModel] | BaseModel | str | None:
|
||||||
|
params = {
|
||||||
|
"messages": messages,
|
||||||
|
"response_format": {"type": "json_object"},
|
||||||
|
"model": model if model else self._default_model
|
||||||
|
}
|
||||||
|
|
||||||
|
if temperature:
|
||||||
|
params["temperature"] = temperature
|
||||||
|
|
||||||
|
attempt = 0
|
||||||
|
while attempt < max_retries:
|
||||||
|
result = self._client.chat.completions.create(**params)
|
||||||
|
result_content = result.choices[0].message.content
|
||||||
|
try:
|
||||||
|
result_json = json.loads(result_content)
|
||||||
|
return map_to_model(result_json)
|
||||||
|
except Exception as e:
|
||||||
|
attempt += 1
|
||||||
|
self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
|
||||||
|
params["messages"] = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"Your previous response wasn't in the json format I've explicitly told you to output. "
|
||||||
|
f"In your next response, you will fix it and return me just the json I've asked."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
f"Previous response: {result_content}\n"
|
||||||
|
f"JSON format: {json_scheme}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
]
|
||||||
|
if attempt >= max_retries:
|
||||||
|
self._logger.error(f"Max retries exceeded!")
|
||||||
|
return None
|
||||||
85
training_content/kb.py
Normal file
85
training_content/kb.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from logging import getLogger
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
import faiss
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
class TrainingContentKnowledgeBase:
|
||||||
|
|
||||||
|
def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
|
||||||
|
self._embedding_model = embeddings
|
||||||
|
self._tips = None # self._read_json(path)
|
||||||
|
self._category_metadata = None
|
||||||
|
self._indices = None
|
||||||
|
self._logger = getLogger(__name__)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _read_json(path: str) -> Dict[str, any]:
|
||||||
|
with open(path, 'r', encoding="utf-8") as json_file:
|
||||||
|
return json.loads(json_file.read())
|
||||||
|
|
||||||
|
def print_category_count(self):
|
||||||
|
category_tips = {}
|
||||||
|
for unit in self._tips['units']:
|
||||||
|
for page in unit['pages']:
|
||||||
|
for tip in page['tips']:
|
||||||
|
category = tip['category'].lower().replace(" ", "_")
|
||||||
|
if category not in category_tips:
|
||||||
|
category_tips[category] = 0
|
||||||
|
else:
|
||||||
|
category_tips[category] = category_tips[category] + 1
|
||||||
|
print(category_tips)
|
||||||
|
|
||||||
|
def create_embeddings_and_save_them(self) -> None:
|
||||||
|
category_embeddings = {}
|
||||||
|
category_metadata = {}
|
||||||
|
|
||||||
|
for unit in self._tips['units']:
|
||||||
|
for page in unit['pages']:
|
||||||
|
for tip in page['tips']:
|
||||||
|
category = tip['category'].lower().replace(" ", "_")
|
||||||
|
if category not in category_embeddings:
|
||||||
|
category_embeddings[category] = []
|
||||||
|
category_metadata[category] = []
|
||||||
|
|
||||||
|
category_embeddings[category].append(tip['embedding'])
|
||||||
|
category_metadata[category].append({"id": tip['id'], "text": tip['text']})
|
||||||
|
|
||||||
|
category_indices = {}
|
||||||
|
for category, embeddings in category_embeddings.items():
|
||||||
|
embeddings_array = self._embedding_model.encode(embeddings)
|
||||||
|
index = faiss.IndexFlatL2(embeddings_array.shape[1])
|
||||||
|
index.add(embeddings_array)
|
||||||
|
category_indices[category] = index
|
||||||
|
|
||||||
|
faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
|
||||||
|
|
||||||
|
with open("./faiss/tips_metadata.pkl", "wb") as f:
|
||||||
|
pickle.dump(category_metadata, f)
|
||||||
|
|
||||||
|
def load_indices_and_metadata(
|
||||||
|
self,
|
||||||
|
directory: str = './faiss',
|
||||||
|
suffix: str = '_tips_index.faiss',
|
||||||
|
metadata_path: str = './faiss/tips_metadata.pkl'
|
||||||
|
):
|
||||||
|
files = os.listdir(directory)
|
||||||
|
self._indices = {}
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(suffix):
|
||||||
|
self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
|
||||||
|
self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
|
||||||
|
|
||||||
|
with open(metadata_path, 'rb') as f:
|
||||||
|
self._category_metadata = pickle.load(f)
|
||||||
|
self._logger.info("Loaded tips metadata")
|
||||||
|
|
||||||
|
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
|
||||||
|
query_embedding = self._embedding_model.encode([query])
|
||||||
|
index = self._indices[category]
|
||||||
|
D, I = index.search(query_embedding, top_k)
|
||||||
|
results = [self._category_metadata[category][i] for i in I[0]]
|
||||||
|
return results
|
||||||
341
training_content/service.py
Normal file
341
training_content/service.py
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from logging import getLogger
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
|
||||||
|
|
||||||
|
|
||||||
|
class TrainingContentService:
|
||||||
|
|
||||||
|
TOOLS = [
|
||||||
|
'critical_thinking',
|
||||||
|
'language_for_writing',
|
||||||
|
'reading_skills',
|
||||||
|
'strategy',
|
||||||
|
'words',
|
||||||
|
'writing_skills'
|
||||||
|
]
|
||||||
|
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
|
||||||
|
|
||||||
|
def __init__(self, kb, openai, firestore):
|
||||||
|
self._training_content_module = kb
|
||||||
|
self._db = firestore
|
||||||
|
self._logger = getLogger(__name__)
|
||||||
|
self._llm = openai
|
||||||
|
|
||||||
|
def get_tips(self, stats):
|
||||||
|
exam_data, exam_map = self._sort_out_solutions(stats)
|
||||||
|
training_content = self._get_exam_details_and_tips(exam_data)
|
||||||
|
tips = self._query_kb(training_content.queries)
|
||||||
|
usefull_tips = self._get_usefull_tips(exam_data, tips)
|
||||||
|
exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
|
||||||
|
|
||||||
|
weak_areas = {"weak_areas": []}
|
||||||
|
for area in training_content.weak_areas:
|
||||||
|
weak_areas["weak_areas"].append(area.dict())
|
||||||
|
|
||||||
|
training_doc = {
|
||||||
|
'created_at': int(datetime.now().timestamp() * 1000),
|
||||||
|
**exam_map,
|
||||||
|
**usefull_tips.dict(),
|
||||||
|
**weak_areas
|
||||||
|
}
|
||||||
|
doc_ref = self._db.collection('training').add(training_doc)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": doc_ref[1].id
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
|
||||||
|
new_exam_map = {"exams": []}
|
||||||
|
for detail in details:
|
||||||
|
new_exam_map["exams"].append({
|
||||||
|
"id": detail.exam_id,
|
||||||
|
"date": detail.date,
|
||||||
|
"performance_comment": detail.performance_comment,
|
||||||
|
"detailed_summary": detail.detailed_summary,
|
||||||
|
**exam_map[detail.exam_id]
|
||||||
|
})
|
||||||
|
return new_exam_map
|
||||||
|
|
||||||
|
def _query_kb(self, queries: List[QueryDTO]):
|
||||||
|
map_categories = {
|
||||||
|
"critical_thinking": "ct_focus",
|
||||||
|
"language_for_writing": "language_for_writing",
|
||||||
|
"reading_skills": "reading_skill",
|
||||||
|
"strategy": "strategy",
|
||||||
|
"writing_skills": "writing_skill"
|
||||||
|
}
|
||||||
|
|
||||||
|
tips = {"tips": []}
|
||||||
|
for query in queries:
|
||||||
|
if query.category == "words":
|
||||||
|
tips["tips"].extend(
|
||||||
|
self._training_content_module.query_knowledge_base(query.text, "word_link")
|
||||||
|
)
|
||||||
|
tips["tips"].extend(
|
||||||
|
self._training_content_module.query_knowledge_base(query.text, "word_partners")
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if query.category in map_categories:
|
||||||
|
tips["tips"].extend(
|
||||||
|
self._training_content_module.query_knowledge_base(query.text, map_categories[query.category])
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
|
||||||
|
return tips
|
||||||
|
|
||||||
|
def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
|
||||||
|
json_schema = (
|
||||||
|
'{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
|
||||||
|
' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
|
||||||
|
)
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
f"I'm going to provide you with exam data, you will take the exam data and fill this json "
|
||||||
|
f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
|
||||||
|
'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
|
||||||
|
'summary of the student\'s performance, "weak_areas" are identified areas'
|
||||||
|
' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
|
||||||
|
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
|
||||||
|
'for tips that will be displayed to the student, the category attribute is a collection of '
|
||||||
|
'embeddings and the text will be the text used to query the knowledge base. The categories are '
|
||||||
|
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
|
||||||
|
'"exams" is the exam id, an exam can be composed of multiple modules or single modules.'
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f'Exam Data: {str(exam_data)}'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
return self._llm.prediction(messages, self._map_gpt_response, json_schema)
|
||||||
|
|
||||||
|
def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
|
||||||
|
json_schema = (
|
||||||
|
'{ "tip_ids": [] }'
|
||||||
|
)
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
f"I'm going to provide you with tips and I want you to return to me the tips that "
|
||||||
|
f"can be usefull for the student that made the exam that I'm going to send you, return "
|
||||||
|
f"me the tip ids in this json format {json_schema}."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f'Exam Data: {str(exam_data)}'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f'Tips: {str(tips)}'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
return self._llm.prediction(messages, lambda response: TipsDTO(**response), json_schema)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
|
||||||
|
parsed_response = {
|
||||||
|
"details": [DetailsDTO(**detail) for detail in response["details"]],
|
||||||
|
"weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
|
||||||
|
"queries": [QueryDTO(**query) for query in response["queries"]]
|
||||||
|
}
|
||||||
|
return TrainingContentDTO(**parsed_response)
|
||||||
|
|
||||||
|
def _sort_out_solutions(self, stats):
|
||||||
|
grouped_stats = {}
|
||||||
|
for stat in stats:
|
||||||
|
session_key = f'{str(stat["date"])}-{stat["user"]}'
|
||||||
|
module = stat["module"]
|
||||||
|
exam_id = stat["exam"]
|
||||||
|
|
||||||
|
if session_key not in grouped_stats:
|
||||||
|
grouped_stats[session_key] = {}
|
||||||
|
if module not in grouped_stats[session_key]:
|
||||||
|
grouped_stats[session_key][module] = {
|
||||||
|
"stats": [],
|
||||||
|
"exam_id": exam_id
|
||||||
|
}
|
||||||
|
grouped_stats[session_key][module]["stats"].append(stat)
|
||||||
|
|
||||||
|
exercises = {}
|
||||||
|
exam_map = {}
|
||||||
|
for session_key, modules in grouped_stats.items():
|
||||||
|
exercises[session_key] = {}
|
||||||
|
for module, module_stats in modules.items():
|
||||||
|
exercises[session_key][module] = {}
|
||||||
|
|
||||||
|
exam_id = module_stats["exam_id"]
|
||||||
|
if exam_id not in exercises[session_key][module]:
|
||||||
|
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
|
||||||
|
|
||||||
|
exam_total_questions = 0
|
||||||
|
exam_total_correct = 0
|
||||||
|
|
||||||
|
for stat in module_stats["stats"]:
|
||||||
|
exam_total_questions += stat["score"]["total"]
|
||||||
|
exam_total_correct += stat["score"]["correct"]
|
||||||
|
exercises[session_key][module][exam_id]["date"] = stat["date"]
|
||||||
|
|
||||||
|
if session_key not in exam_map:
|
||||||
|
exam_map[session_key] = {"stat_ids": [], "score": 0}
|
||||||
|
exam_map[session_key]["stat_ids"].append(stat["id"])
|
||||||
|
|
||||||
|
exam = self._get_doc_by_id(module, exam_id)
|
||||||
|
if module == "listening":
|
||||||
|
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||||
|
self._get_listening_solutions(stat, exam))
|
||||||
|
elif module == "reading":
|
||||||
|
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||||
|
self._get_reading_solutions(stat, exam))
|
||||||
|
elif module == "writing":
|
||||||
|
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||||
|
self._get_writing_prompts_and_answers(stat, exam)
|
||||||
|
)
|
||||||
|
elif module == "speaking":
|
||||||
|
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||||
|
self._get_speaking_solutions(stat, exam)
|
||||||
|
)
|
||||||
|
elif module == "level": # same structure as listening
|
||||||
|
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||||
|
self._get_listening_solutions(stat, exam)
|
||||||
|
)
|
||||||
|
|
||||||
|
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
|
||||||
|
exam_map[session_key]["module"] = module
|
||||||
|
|
||||||
|
return {"exams": exercises}, exam_map
|
||||||
|
|
||||||
|
def _get_writing_prompts_and_answers(self, stat, exam):
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
exercises = []
|
||||||
|
for solution in stat['solutions']:
|
||||||
|
answer = solution['solution']
|
||||||
|
exercise_id = solution['id']
|
||||||
|
exercises.append({
|
||||||
|
"exercise_id": exercise_id,
|
||||||
|
"answer": answer
|
||||||
|
})
|
||||||
|
for exercise in exercises:
|
||||||
|
for exam_exercise in exam["exercises"]:
|
||||||
|
if exam_exercise["id"] == exercise["exercise_id"]:
|
||||||
|
result.append({
|
||||||
|
"exercise": exam_exercise["prompt"],
|
||||||
|
"answer": exercise["answer"]
|
||||||
|
})
|
||||||
|
|
||||||
|
except KeyError as e:
|
||||||
|
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _get_listening_solutions(self, stat, exam):
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
for part in exam["parts"]:
|
||||||
|
for exercise in part["exercises"]:
|
||||||
|
if exercise["id"] == stat["exercise"]:
|
||||||
|
if stat["type"] == "writeBlanks":
|
||||||
|
result.append({
|
||||||
|
"question": exercise["prompt"],
|
||||||
|
"template": exercise["text"],
|
||||||
|
"solution": exercise["solutions"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
elif stat["type"] == "multipleChoice":
|
||||||
|
result.append({
|
||||||
|
"question": exercise["prompt"],
|
||||||
|
"exercise": exercise["questions"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
except KeyError as e:
|
||||||
|
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _get_speaking_solutions(self, stat, exam):
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
result = {
|
||||||
|
"comments": {
|
||||||
|
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
|
||||||
|
,
|
||||||
|
"exercises": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
for exercise in exam["exercises"]:
|
||||||
|
if exercise["id"] == stat["exercise"]:
|
||||||
|
if stat["type"] == "interactiveSpeaking":
|
||||||
|
for i in range(len(exercise["prompts"])):
|
||||||
|
result["exercises"][f"exercise_{i+1}"] = {
|
||||||
|
"question": exercise["prompts"][i]["text"]
|
||||||
|
}
|
||||||
|
for i in range(len(exercise["prompts"])):
|
||||||
|
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
|
||||||
|
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
|
||||||
|
elif stat["type"] == "speaking":
|
||||||
|
result["exercises"]["exercise_1"] = {
|
||||||
|
"question": exercise["text"],
|
||||||
|
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
|
||||||
|
}
|
||||||
|
except KeyError as e:
|
||||||
|
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||||
|
return [result]
|
||||||
|
|
||||||
|
def _get_reading_solutions(self, stat, exam):
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
for part in exam["parts"]:
|
||||||
|
text = part["text"]
|
||||||
|
for exercise in part["exercises"]:
|
||||||
|
if exercise["id"] == stat["exercise"]:
|
||||||
|
if stat["type"] == "fillBlanks":
|
||||||
|
result.append({
|
||||||
|
"text": text,
|
||||||
|
"question": exercise["prompt"],
|
||||||
|
"template": exercise["text"],
|
||||||
|
"words": exercise["words"],
|
||||||
|
"solutions": exercise["solutions"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
elif stat["type"] == "writeBlanks":
|
||||||
|
result.append({
|
||||||
|
"text": text,
|
||||||
|
"question": exercise["prompt"],
|
||||||
|
"template": exercise["text"],
|
||||||
|
"solutions": exercise["solutions"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
elif stat["type"] == "trueFalse":
|
||||||
|
result.append({
|
||||||
|
"text": text,
|
||||||
|
"questions": exercise["questions"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
elif stat["type"] == "matchSentences":
|
||||||
|
result.append({
|
||||||
|
"text": text,
|
||||||
|
"question": exercise["prompt"],
|
||||||
|
"sentences": exercise["sentences"],
|
||||||
|
"options": exercise["options"],
|
||||||
|
"answer": stat["solutions"]
|
||||||
|
})
|
||||||
|
except KeyError as e:
|
||||||
|
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _get_doc_by_id(self, collection: str, doc_id: str):
|
||||||
|
collection_ref = self._db.collection(collection)
|
||||||
|
doc_ref = collection_ref.document(doc_id)
|
||||||
|
doc = doc_ref.get()
|
||||||
|
|
||||||
|
if doc.exists:
|
||||||
|
return doc.to_dict()
|
||||||
|
return None
|
||||||
Reference in New Issue
Block a user