ENCOA-94: Added user to training content docs, added support for shuffles, tweaked training prompt
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from logging import getLogger
|
||||
|
||||
@@ -24,7 +25,8 @@ class TrainingContentService:
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
def get_tips(self, stats):
|
||||
def get_tips(self, training_content):
|
||||
user, stats = training_content["userID"], training_content["stats"]
|
||||
exam_data, exam_map = self._sort_out_solutions(stats)
|
||||
training_content = self._get_exam_details_and_tips(exam_data)
|
||||
tips = self._query_kb(training_content.queries)
|
||||
@@ -39,7 +41,8 @@ class TrainingContentService:
|
||||
'created_at': int(datetime.now().timestamp() * 1000),
|
||||
**exam_map,
|
||||
**usefull_tips.dict(),
|
||||
**weak_areas
|
||||
**weak_areas,
|
||||
"user": user
|
||||
}
|
||||
doc_ref = self._db.collection('training').add(training_doc)
|
||||
return {
|
||||
@@ -70,7 +73,6 @@ class TrainingContentService:
|
||||
|
||||
tips = {"tips": []}
|
||||
for query in queries:
|
||||
print(f"{query.category} {query.text}")
|
||||
if query.category == "words":
|
||||
tips["tips"].extend(
|
||||
self._training_content_module.query_knowledge_base(query.text, "word_link")
|
||||
@@ -104,7 +106,16 @@ class TrainingContentService:
|
||||
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
|
||||
'for tips that will be displayed to the student, the category attribute is a collection of '
|
||||
'embeddings and the text will be the text used to query the knowledge base. The categories are '
|
||||
f'the following [{", ".join(self.TOOLS)}].'
|
||||
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
|
||||
'"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
|
||||
' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
|
||||
'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
|
||||
'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
|
||||
'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
|
||||
'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
|
||||
'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
|
||||
'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
|
||||
'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
|
||||
)
|
||||
},
|
||||
{
|
||||
@@ -150,42 +161,68 @@ class TrainingContentService:
|
||||
def _sort_out_solutions(self, stats):
|
||||
grouped_stats = {}
|
||||
for stat in stats:
|
||||
exam_id = stat["exam"]
|
||||
session_key = f'{str(stat["date"])}-{stat["user"]}'
|
||||
module = stat["module"]
|
||||
if module not in grouped_stats:
|
||||
grouped_stats[module] = {}
|
||||
if exam_id not in grouped_stats[module]:
|
||||
grouped_stats[module][exam_id] = []
|
||||
grouped_stats[module][exam_id].append(stat)
|
||||
exam_id = stat["exam"]
|
||||
|
||||
if session_key not in grouped_stats:
|
||||
grouped_stats[session_key] = {}
|
||||
if module not in grouped_stats[session_key]:
|
||||
grouped_stats[session_key][module] = {
|
||||
"stats": [],
|
||||
"exam_id": exam_id
|
||||
}
|
||||
grouped_stats[session_key][module]["stats"].append(stat)
|
||||
|
||||
exercises = {}
|
||||
exam_map = {}
|
||||
for module, exams in grouped_stats.items():
|
||||
exercises[module] = {}
|
||||
for exam_id, stat_group in exams.items():
|
||||
exam = self._get_doc_by_id(module, exam_id)
|
||||
exercises[module][exam_id] = {"date": None, "exercises": [], "score": None}
|
||||
for session_key, modules in grouped_stats.items():
|
||||
exercises[session_key] = {}
|
||||
for module, module_stats in modules.items():
|
||||
exercises[session_key][module] = {}
|
||||
|
||||
exam_id = module_stats["exam_id"]
|
||||
if exam_id not in exercises[session_key][module]:
|
||||
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
|
||||
|
||||
exam_total_questions = 0
|
||||
exam_total_correct = 0
|
||||
for stat in stat_group:
|
||||
|
||||
for stat in module_stats["stats"]:
|
||||
exam_total_questions += stat["score"]["total"]
|
||||
exam_total_correct += stat["score"]["correct"]
|
||||
exercises[module][exam_id]["date"] = stat["date"]
|
||||
exercises[session_key][module][exam_id]["date"] = stat["date"]
|
||||
|
||||
if exam_id not in exam_map:
|
||||
exam_map[exam_id] = {"stat_ids": [], "score": 0}
|
||||
exam_map[exam_id]["stat_ids"].append(stat["id"])
|
||||
if session_key not in exam_map:
|
||||
exam_map[session_key] = {"stat_ids": [], "score": 0}
|
||||
exam_map[session_key]["stat_ids"].append(stat["id"])
|
||||
|
||||
exam = self._get_doc_by_id(module, exam_id)
|
||||
if module == "listening":
|
||||
exercises[module][exam_id]["exercises"].extend(self._get_listening_solutions(stat, exam))
|
||||
if module == "reading":
|
||||
exercises[module][exam_id]["exercises"].extend(self._get_reading_solutions(stat, exam))
|
||||
if module == "writing":
|
||||
exercises[module][exam_id]["exercises"].extend(self._get_writing_prompts_and_answers(stat, exam))
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_listening_solutions(stat, exam))
|
||||
elif module == "reading":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_reading_solutions(stat, exam))
|
||||
elif module == "writing":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_writing_prompts_and_answers(stat, exam)
|
||||
)
|
||||
elif module == "speaking":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_speaking_solutions(stat, exam)
|
||||
)
|
||||
elif module == "level":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_level_solutions(stat, exam)
|
||||
)
|
||||
|
||||
exam_map[exam_id]["score"] = round((exam_total_correct / exam_total_questions) * 100)
|
||||
exam_map[exam_id]["module"] = module
|
||||
return exercises, exam_map
|
||||
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
|
||||
exam_map[session_key]["module"] = module
|
||||
with open('exam_result.json', 'w') as file:
|
||||
json.dump({"exams": exercises}, file, indent=4)
|
||||
|
||||
return {"exams": exercises}, exam_map
|
||||
|
||||
def _get_writing_prompts_and_answers(self, stat, exam):
|
||||
result = []
|
||||
@@ -211,6 +248,54 @@ class TrainingContentService:
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _get_mc_question(exercise, stat):
|
||||
shuffle_maps = stat.get("shuffleMaps", [])
|
||||
answer = stat["solutions"] if len(shuffle_maps) == 0 else []
|
||||
if len(shuffle_maps) != 0:
|
||||
for solution in stat["solutions"]:
|
||||
shuffle_map = [
|
||||
item["map"] for item in shuffle_maps
|
||||
if item["questionID"] == solution["question"]
|
||||
]
|
||||
answer.append({
|
||||
"question": solution["question"],
|
||||
"option": shuffle_map[solution["option"]]
|
||||
})
|
||||
return {
|
||||
"question": exercise["prompt"],
|
||||
"exercise": exercise["questions"],
|
||||
"answer": stat["solutions"]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _swap_key_name(d, original_key, new_key):
|
||||
d[new_key] = d.pop(original_key)
|
||||
return d
|
||||
|
||||
def _get_level_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
for part in exam["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "fillBlanks":
|
||||
result.append({
|
||||
"prompt": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"words": exercise["words"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": [
|
||||
self._swap_key_name(item, 'solution', 'option')
|
||||
for item in stat["solutions"]
|
||||
]
|
||||
})
|
||||
elif stat["type"] == "multipleChoice":
|
||||
result.append(self._get_mc_question(exercise, stat))
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return result
|
||||
|
||||
def _get_listening_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
@@ -224,16 +309,54 @@ class TrainingContentService:
|
||||
"solution": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
if stat["type"] == "multipleChoice":
|
||||
elif stat["type"] == "fillBlanks":
|
||||
result.append({
|
||||
"question": exercise["prompt"],
|
||||
"exercise": exercise["questions"],
|
||||
"template": exercise["text"],
|
||||
"words": exercise["words"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "multipleChoice":
|
||||
result.append(self._get_mc_question(exercise, stat))
|
||||
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _find_shuffle_map(shuffle_maps, question_id):
|
||||
return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
|
||||
|
||||
def _get_speaking_solutions(self, stat, exam):
|
||||
result = {}
|
||||
try:
|
||||
result = {
|
||||
"comments": {
|
||||
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
|
||||
,
|
||||
"exercises": {}
|
||||
}
|
||||
|
||||
for exercise in exam["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "interactiveSpeaking":
|
||||
for i in range(len(exercise["prompts"])):
|
||||
result["exercises"][f"exercise_{i+1}"] = {
|
||||
"question": exercise["prompts"][i]["text"]
|
||||
}
|
||||
for i in range(len(exercise["prompts"])):
|
||||
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
|
||||
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
|
||||
elif stat["type"] == "speaking":
|
||||
result["exercises"]["exercise_1"] = {
|
||||
"question": exercise["text"],
|
||||
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
|
||||
}
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return [result]
|
||||
|
||||
def _get_reading_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
@@ -258,8 +381,13 @@ class TrainingContentService:
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
else:
|
||||
# match_sentences
|
||||
elif stat["type"] == "trueFalse":
|
||||
result.append({
|
||||
"text": text,
|
||||
"questions": exercise["questions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "matchSentences":
|
||||
result.append({
|
||||
"text": text,
|
||||
"question": exercise["prompt"],
|
||||
|
||||
@@ -36,7 +36,7 @@ class UploadLevelService:
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
|
||||
if response:
|
||||
return response.dict(exclude_none=True)
|
||||
return self.fix_ids(response.dict(exclude_none=True))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@@ -378,3 +378,18 @@ class UploadLevelService:
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def fix_ids(response):
|
||||
counter = 1
|
||||
for part in response["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["type"] == "multipleChoice":
|
||||
for question in exercise["questions"]:
|
||||
question["id"] = counter
|
||||
counter += 1
|
||||
if exercise["type"] == "fillBlanks":
|
||||
for i in range(len(exercise["words"])):
|
||||
exercise["words"][i]["id"] = counter
|
||||
exercise["solutions"][i]["id"] = counter
|
||||
counter += 1
|
||||
return response
|
||||
Reference in New Issue
Block a user