ENCOA-94: Added user to training content docs, added support for shuffles, tweaked training prompt

2024-08-26 18:14:57 +01:00
parent 03f5b7d72c cf7a966141
commit efff0b904e
6 changed files with 751 additions and 332 deletions
--- a/modules/training_content/service.py
+++ b/modules/training_content/service.py
@@ -1,3 +1,4 @@
+import json
 from datetime import datetime
 from logging import getLogger

@@ -24,7 +25,8 @@ class TrainingContentService:
        self._logger = getLogger(__name__)
        self._llm = openai

-    def get_tips(self, stats):
+    def get_tips(self, training_content):
+        user, stats = training_content["userID"], training_content["stats"]
        exam_data, exam_map = self._sort_out_solutions(stats)
        training_content = self._get_exam_details_and_tips(exam_data)
        tips = self._query_kb(training_content.queries)
@@ -39,7 +41,8 @@ class TrainingContentService:
            'created_at': int(datetime.now().timestamp() * 1000),
            **exam_map,
            **usefull_tips.dict(),
-            **weak_areas
+            **weak_areas,
+            "user": user
        }
        doc_ref = self._db.collection('training').add(training_doc)
        return {
@@ -70,7 +73,6 @@ class TrainingContentService:

        tips = {"tips": []}
        for query in queries:
-            print(f"{query.category} {query.text}")
            if query.category == "words":
                tips["tips"].extend(
                    self._training_content_module.query_knowledge_base(query.text, "word_link")
@@ -104,7 +106,16 @@ class TrainingContentService:
                    ' with sentence structure and punctuation.", the "queries" field is where you will write queries '
                    'for tips that will be displayed to the student, the category attribute is a collection of '
                    'embeddings and the text will be the text used to query the knowledge base. The categories are '
-                    f'the following [{", ".join(self.TOOLS)}].'
+                    f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
+                    '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
+                    ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
+                    'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
+                    'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
+                    'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
+                    'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
+                    'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
+                    'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
+                    'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
                )
            },
            {
@@ -150,42 +161,68 @@ class TrainingContentService:
    def _sort_out_solutions(self, stats):
        grouped_stats = {}
        for stat in stats:
-            exam_id = stat["exam"]
+            session_key = f'{str(stat["date"])}-{stat["user"]}'
            module = stat["module"]
-            if module not in grouped_stats:
-                grouped_stats[module] = {}
-            if exam_id not in grouped_stats[module]:
-                grouped_stats[module][exam_id] = []
-            grouped_stats[module][exam_id].append(stat)
+            exam_id = stat["exam"]
+
+            if session_key not in grouped_stats:
+                grouped_stats[session_key] = {}
+            if module not in grouped_stats[session_key]:
+                grouped_stats[session_key][module] = {
+                    "stats": [],
+                    "exam_id": exam_id
+                }
+            grouped_stats[session_key][module]["stats"].append(stat)

        exercises = {}
        exam_map = {}
-        for module, exams in grouped_stats.items():
-            exercises[module] = {}
-            for exam_id, stat_group in exams.items():
-                exam = self._get_doc_by_id(module, exam_id)
-                exercises[module][exam_id] = {"date": None, "exercises": [], "score": None}
+        for session_key, modules in grouped_stats.items():
+            exercises[session_key] = {}
+            for module, module_stats in modules.items():
+                exercises[session_key][module] = {}
+
+                exam_id = module_stats["exam_id"]
+                if exam_id not in exercises[session_key][module]:
+                    exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
+
                exam_total_questions = 0
                exam_total_correct = 0
-                for stat in stat_group:
+
+                for stat in module_stats["stats"]:
                    exam_total_questions += stat["score"]["total"]
                    exam_total_correct += stat["score"]["correct"]
-                    exercises[module][exam_id]["date"] = stat["date"]
+                    exercises[session_key][module][exam_id]["date"] = stat["date"]

-                    if exam_id not in exam_map:
-                        exam_map[exam_id] = {"stat_ids": [], "score": 0}
-                    exam_map[exam_id]["stat_ids"].append(stat["id"])
+                    if session_key not in exam_map:
+                        exam_map[session_key] = {"stat_ids": [], "score": 0}
+                    exam_map[session_key]["stat_ids"].append(stat["id"])

+                    exam = self._get_doc_by_id(module, exam_id)
                    if module == "listening":
-                        exercises[module][exam_id]["exercises"].extend(self._get_listening_solutions(stat, exam))
-                    if module == "reading":
-                        exercises[module][exam_id]["exercises"].extend(self._get_reading_solutions(stat, exam))
-                    if module == "writing":
-                        exercises[module][exam_id]["exercises"].extend(self._get_writing_prompts_and_answers(stat, exam))
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_listening_solutions(stat, exam))
+                    elif module == "reading":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_reading_solutions(stat, exam))
+                    elif module == "writing":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_writing_prompts_and_answers(stat, exam)
+                        )
+                    elif module == "speaking":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_speaking_solutions(stat, exam)
+                        )
+                    elif module == "level":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_level_solutions(stat, exam)
+                        )

-                exam_map[exam_id]["score"] = round((exam_total_correct / exam_total_questions) * 100)
-                exam_map[exam_id]["module"] = module
-        return exercises, exam_map
+                exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
+                exam_map[session_key]["module"] = module
+                with open('exam_result.json', 'w') as file:
+                    json.dump({"exams": exercises}, file, indent=4)
+
+        return {"exams": exercises}, exam_map

    def _get_writing_prompts_and_answers(self, stat, exam):
        result = []
@@ -211,6 +248,54 @@ class TrainingContentService:

        return result

+    @staticmethod
+    def _get_mc_question(exercise, stat):
+        shuffle_maps = stat.get("shuffleMaps", [])
+        answer = stat["solutions"] if len(shuffle_maps) == 0 else []
+        if len(shuffle_maps) != 0:
+            for solution in stat["solutions"]:
+                shuffle_map = [
+                    item["map"] for item in shuffle_maps
+                    if item["questionID"] == solution["question"]
+                ]
+                answer.append({
+                    "question": solution["question"],
+                    "option": shuffle_map[solution["option"]]
+                })
+        return {
+            "question": exercise["prompt"],
+            "exercise": exercise["questions"],
+            "answer": stat["solutions"]
+        }
+
+    @staticmethod
+    def _swap_key_name(d, original_key, new_key):
+        d[new_key] = d.pop(original_key)
+        return d
+
+    def _get_level_solutions(self, stat, exam):
+        result = []
+        try:
+            for part in exam["parts"]:
+                for exercise in part["exercises"]:
+                    if exercise["id"] == stat["exercise"]:
+                        if stat["type"] == "fillBlanks":
+                            result.append({
+                                "prompt": exercise["prompt"],
+                                "template": exercise["text"],
+                                "words": exercise["words"],
+                                "solutions": exercise["solutions"],
+                                "answer": [
+                                    self._swap_key_name(item, 'solution', 'option')
+                                    for item in stat["solutions"]
+                                ]
+                            })
+                        elif stat["type"] == "multipleChoice":
+                            result.append(self._get_mc_question(exercise, stat))
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return result
+
    def _get_listening_solutions(self, stat, exam):
        result = []
        try:
@@ -224,16 +309,54 @@ class TrainingContentService:
                                "solution": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
-                        if stat["type"] == "multipleChoice":
+                        elif stat["type"] == "fillBlanks":
                            result.append({
                                "question": exercise["prompt"],
-                                "exercise": exercise["questions"],
+                                "template": exercise["text"],
+                                "words": exercise["words"],
+                                "solutions": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
+                        elif stat["type"] == "multipleChoice":
+                            result.append(self._get_mc_question(exercise, stat))
+
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return result

+    @staticmethod
+    def _find_shuffle_map(shuffle_maps, question_id):
+        return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
+
+    def _get_speaking_solutions(self, stat, exam):
+        result = {}
+        try:
+            result = {
+                "comments": {
+                    key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
+                ,
+                "exercises": {}
+            }
+
+            for exercise in exam["exercises"]:
+                if exercise["id"] == stat["exercise"]:
+                    if stat["type"] == "interactiveSpeaking":
+                        for i in range(len(exercise["prompts"])):
+                            result["exercises"][f"exercise_{i+1}"] = {
+                                "question": exercise["prompts"][i]["text"]
+                            }
+                        for i in range(len(exercise["prompts"])):
+                            answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
+                            result["exercises"][f"exercise_{i+1}"]["answer"] = answer
+                    elif stat["type"] == "speaking":
+                        result["exercises"]["exercise_1"] = {
+                            "question": exercise["text"],
+                            "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
+                        }
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return [result]
+
    def _get_reading_solutions(self, stat, exam):
        result = []
        try:
@@ -258,8 +381,13 @@ class TrainingContentService:
                                "solutions": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
-                        else:
-                            # match_sentences
+                        elif stat["type"] == "trueFalse":
+                            result.append({
+                                "text": text,
+                                "questions": exercise["questions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "matchSentences":
                            result.append({
                                "text": text,
                                "question": exercise["prompt"],