ENCOA-94: Added user to training content docs, added support for shuffles, tweaked training prompt

This commit is contained in:
Carlos Mesquita
2024-08-26 18:14:57 +01:00
6 changed files with 751 additions and 332 deletions

View File

@@ -1,3 +1,4 @@
import json
from datetime import datetime
from logging import getLogger
@@ -24,7 +25,8 @@ class TrainingContentService:
self._logger = getLogger(__name__)
self._llm = openai
def get_tips(self, stats):
def get_tips(self, training_content):
user, stats = training_content["userID"], training_content["stats"]
exam_data, exam_map = self._sort_out_solutions(stats)
training_content = self._get_exam_details_and_tips(exam_data)
tips = self._query_kb(training_content.queries)
@@ -39,7 +41,8 @@ class TrainingContentService:
'created_at': int(datetime.now().timestamp() * 1000),
**exam_map,
**usefull_tips.dict(),
**weak_areas
**weak_areas,
"user": user
}
doc_ref = self._db.collection('training').add(training_doc)
return {
@@ -70,7 +73,6 @@ class TrainingContentService:
tips = {"tips": []}
for query in queries:
print(f"{query.category} {query.text}")
if query.category == "words":
tips["tips"].extend(
self._training_content_module.query_knowledge_base(query.text, "word_link")
@@ -104,7 +106,16 @@ class TrainingContentService:
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
'for tips that will be displayed to the student, the category attribute is a collection of '
'embeddings and the text will be the text used to query the knowledge base. The categories are '
f'the following [{", ".join(self.TOOLS)}].'
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
'"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
)
},
{
@@ -150,42 +161,68 @@ class TrainingContentService:
def _sort_out_solutions(self, stats):
grouped_stats = {}
for stat in stats:
exam_id = stat["exam"]
session_key = f'{str(stat["date"])}-{stat["user"]}'
module = stat["module"]
if module not in grouped_stats:
grouped_stats[module] = {}
if exam_id not in grouped_stats[module]:
grouped_stats[module][exam_id] = []
grouped_stats[module][exam_id].append(stat)
exam_id = stat["exam"]
if session_key not in grouped_stats:
grouped_stats[session_key] = {}
if module not in grouped_stats[session_key]:
grouped_stats[session_key][module] = {
"stats": [],
"exam_id": exam_id
}
grouped_stats[session_key][module]["stats"].append(stat)
exercises = {}
exam_map = {}
for module, exams in grouped_stats.items():
exercises[module] = {}
for exam_id, stat_group in exams.items():
exam = self._get_doc_by_id(module, exam_id)
exercises[module][exam_id] = {"date": None, "exercises": [], "score": None}
for session_key, modules in grouped_stats.items():
exercises[session_key] = {}
for module, module_stats in modules.items():
exercises[session_key][module] = {}
exam_id = module_stats["exam_id"]
if exam_id not in exercises[session_key][module]:
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
exam_total_questions = 0
exam_total_correct = 0
for stat in stat_group:
for stat in module_stats["stats"]:
exam_total_questions += stat["score"]["total"]
exam_total_correct += stat["score"]["correct"]
exercises[module][exam_id]["date"] = stat["date"]
exercises[session_key][module][exam_id]["date"] = stat["date"]
if exam_id not in exam_map:
exam_map[exam_id] = {"stat_ids": [], "score": 0}
exam_map[exam_id]["stat_ids"].append(stat["id"])
if session_key not in exam_map:
exam_map[session_key] = {"stat_ids": [], "score": 0}
exam_map[session_key]["stat_ids"].append(stat["id"])
exam = self._get_doc_by_id(module, exam_id)
if module == "listening":
exercises[module][exam_id]["exercises"].extend(self._get_listening_solutions(stat, exam))
if module == "reading":
exercises[module][exam_id]["exercises"].extend(self._get_reading_solutions(stat, exam))
if module == "writing":
exercises[module][exam_id]["exercises"].extend(self._get_writing_prompts_and_answers(stat, exam))
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_listening_solutions(stat, exam))
elif module == "reading":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_reading_solutions(stat, exam))
elif module == "writing":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_writing_prompts_and_answers(stat, exam)
)
elif module == "speaking":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_speaking_solutions(stat, exam)
)
elif module == "level":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_level_solutions(stat, exam)
)
exam_map[exam_id]["score"] = round((exam_total_correct / exam_total_questions) * 100)
exam_map[exam_id]["module"] = module
return exercises, exam_map
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
exam_map[session_key]["module"] = module
with open('exam_result.json', 'w') as file:
json.dump({"exams": exercises}, file, indent=4)
return {"exams": exercises}, exam_map
def _get_writing_prompts_and_answers(self, stat, exam):
result = []
@@ -211,6 +248,54 @@ class TrainingContentService:
return result
@staticmethod
def _get_mc_question(exercise, stat):
shuffle_maps = stat.get("shuffleMaps", [])
answer = stat["solutions"] if len(shuffle_maps) == 0 else []
if len(shuffle_maps) != 0:
for solution in stat["solutions"]:
shuffle_map = [
item["map"] for item in shuffle_maps
if item["questionID"] == solution["question"]
]
answer.append({
"question": solution["question"],
"option": shuffle_map[solution["option"]]
})
return {
"question": exercise["prompt"],
"exercise": exercise["questions"],
"answer": stat["solutions"]
}
@staticmethod
def _swap_key_name(d, original_key, new_key):
d[new_key] = d.pop(original_key)
return d
def _get_level_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "fillBlanks":
result.append({
"prompt": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": [
self._swap_key_name(item, 'solution', 'option')
for item in stat["solutions"]
]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
def _get_listening_solutions(self, stat, exam):
result = []
try:
@@ -224,16 +309,54 @@ class TrainingContentService:
"solution": exercise["solutions"],
"answer": stat["solutions"]
})
if stat["type"] == "multipleChoice":
elif stat["type"] == "fillBlanks":
result.append({
"question": exercise["prompt"],
"exercise": exercise["questions"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
@staticmethod
def _find_shuffle_map(shuffle_maps, question_id):
return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
def _get_speaking_solutions(self, stat, exam):
result = {}
try:
result = {
"comments": {
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
,
"exercises": {}
}
for exercise in exam["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "interactiveSpeaking":
for i in range(len(exercise["prompts"])):
result["exercises"][f"exercise_{i+1}"] = {
"question": exercise["prompts"][i]["text"]
}
for i in range(len(exercise["prompts"])):
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
elif stat["type"] == "speaking":
result["exercises"]["exercise_1"] = {
"question": exercise["text"],
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
}
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return [result]
def _get_reading_solutions(self, stat, exam):
result = []
try:
@@ -258,8 +381,13 @@ class TrainingContentService:
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
else:
# match_sentences
elif stat["type"] == "trueFalse":
result.append({
"text": text,
"questions": exercise["questions"],
"answer": stat["solutions"]
})
elif stat["type"] == "matchSentences":
result.append({
"text": text,
"question": exercise["prompt"],