from logging import getLogger from typing import Dict, List from training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO class TrainingContentService: TOOLS = [ 'critical_thinking', 'language_for_writing', 'reading_skills', 'strategy', 'words', 'writing_skills' ] # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing def __init__(self, kb, openai, firestore): self._training_content_module = kb self._db = firestore self._logger = getLogger(__name__) self._llm = openai def get_tips(self, stats): exam_data, exam_map = self._sort_out_solutions(stats) training_content = self._get_exam_details_and_tips(exam_data) tips = self._query_kb(training_content.queries) usefull_tips = self._get_usefull_tips(exam_data, tips) exam_map = self._merge_exam_map_with_details(exam_map, training_content.details) weak_areas = {"weak_areas": []} for area in training_content.weak_areas: weak_areas["weak_areas"].append(area.dict()) training_doc = { **exam_map, **usefull_tips.dict(), **weak_areas } doc_ref = self._db.collection('training').add(training_doc) return { "id": doc_ref[1].id } @staticmethod def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]): new_exam_map = {"exams": []} for detail in details: new_exam_map["exams"].append({ "id": detail.exam_id, "date": detail.date, "performance_comment": detail.performance_comment, "detailed_summary": detail.detailed_summary, **exam_map[detail.exam_id] }) return new_exam_map def _query_kb(self, queries: List[QueryDTO]): map_categories = { "critical_thinking": "ct_focus", "language_for_writing": "language_for_writing", "reading_skills": "reading_skill", "strategy": "strategy", "writing_skills": "writing_skill" } tips = {"tips": []} for query in queries: print(f"{query.category} {query.text}") if query.category == "words": tips["tips"].extend( self._training_content_module.query_knowledge_base(query.text, "word_link") ) tips["tips"].extend( self._training_content_module.query_knowledge_base(query.text, "word_partners") ) else: if query.category in map_categories: tips["tips"].extend( self._training_content_module.query_knowledge_base(query.text, map_categories[query.category]) ) else: self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.") return tips def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO: json_schema = ( '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],' ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }' ) messages = [ { "role": "user", "content": ( f"I'm going to provide you with exam data, you will take the exam data and fill this json " f'schema : {json_schema}. "performance_comment" is a short sentence that describes the ' 'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed ' 'summary of the student\'s performance, "weak_areas" are identified areas' ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues' ' with sentence structure and punctuation.", the "queries" field is where you will write queries ' 'for tips that will be displayed to the student, the category attribute is a collection of ' 'embeddings and the text will be the text used to query the knowledge base. The categories are ' f'the following [{", ".join(self.TOOLS)}].' ) }, { "role": "user", "content": f'Exam Data: {str(exam_data)}' } ] return self._llm.prediction(messages, self._map_gpt_response, json_schema) def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO: json_schema = ( '{ "tip_ids": [] }' ) messages = [ { "role": "user", "content": ( f"I'm going to provide you with tips and I want you to return to me the tips that " f"can be usefull for the student that made the exam that I'm going to send you, return " f"me the tip ids in this json format {json_schema}." ) }, { "role": "user", "content": f'Exam Data: {str(exam_data)}' }, { "role": "user", "content": f'Tips: {str(tips)}' } ] return self._llm.prediction(messages, lambda response: TipsDTO(**response), json_schema) @staticmethod def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO: parsed_response = { "details": [DetailsDTO(**detail) for detail in response["details"]], "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]], "queries": [QueryDTO(**query) for query in response["queries"]] } return TrainingContentDTO(**parsed_response) def _sort_out_solutions(self, stats): grouped_stats = {} for stat in stats: exam_id = stat["exam"] module = stat["module"] if module not in grouped_stats: grouped_stats[module] = {} if exam_id not in grouped_stats[module]: grouped_stats[module][exam_id] = [] grouped_stats[module][exam_id].append(stat) exercises = {} exam_map = {} for module, exams in grouped_stats.items(): exercises[module] = {} for exam_id, stat_group in exams.items(): exam = self._get_doc_by_id(module, exam_id) exercises[module][exam_id] = {"date": None, "exercises": [], "score": None} exam_total_questions = 0 exam_total_correct = 0 for stat in stat_group: exam_total_questions += stat["score"]["total"] exam_total_correct += stat["score"]["correct"] exercises[module][exam_id]["date"] = stat["date"] if exam_id not in exam_map: exam_map[exam_id] = {"stat_ids": [], "score": 0} exam_map[exam_id]["stat_ids"].append(stat["id"]) if module == "listening": exercises[module][exam_id]["exercises"].extend(self._get_listening_solutions(stat, exam)) if module == "reading": exercises[module][exam_id]["exercises"].extend(self._get_reading_solutions(stat, exam)) if module == "writing": exercises[module][exam_id]["exercises"].extend(self._get_writing_prompts_and_answers(stat, exam)) exam_map[exam_id]["score"] = round((exam_total_correct / exam_total_questions) * 100) return exercises, exam_map def _get_writing_prompts_and_answers(self, stat, exam): result = [] try: exercises = [] for solution in stat['solutions']: answer = solution['solution'] exercise_id = solution['id'] exercises.append({ "exercise_id": exercise_id, "answer": answer }) for exercise in exercises: for exam_exercise in exam["exercises"]: if exam_exercise["id"] == exercise["exercise_id"]: result.append({ "exercise": exam_exercise["prompt"], "answer": exercise["answer"] }) except KeyError as e: self._logger.warning(f"Malformed stat object: {str(e)}") return result def _get_listening_solutions(self, stat, exam): result = [] try: for part in exam["parts"]: for exercise in part["exercises"]: if exercise["id"] == stat["exercise"]: if stat["type"] == "writeBlanks": result.append({ "question": exercise["prompt"], "template": exercise["text"], "solution": exercise["solutions"], "answer": stat["solutions"] }) if stat["type"] == "multipleChoice": result.append({ "question": exercise["prompt"], "exercise": exercise["questions"], "answer": stat["solutions"] }) except KeyError as e: self._logger.warning(f"Malformed stat object: {str(e)}") return result def _get_reading_solutions(self, stat, exam): result = [] try: for part in exam["parts"]: text = part["text"] for exercise in part["exercises"]: if exercise["id"] == stat["exercise"]: if stat["type"] == "fillBlanks": result.append({ "text": text, "question": exercise["prompt"], "template": exercise["text"], "words": exercise["words"], "solutions": exercise["solutions"], "answer": stat["solutions"] }) elif stat["type"] == "writeBlanks": result.append({ "text": text, "question": exercise["prompt"], "template": exercise["text"], "solutions": exercise["solutions"], "answer": stat["solutions"] }) else: # match_sentences result.append({ "text": text, "question": exercise["prompt"], "sentences": exercise["sentences"], "options": exercise["options"], "answer": stat["solutions"] }) except KeyError as e: self._logger.warning(f"Malformed stat object: {str(e)}") return result def _get_doc_by_id(self, collection: str, doc_id: str): collection_ref = self._db.collection(collection) doc_ref = collection_ref.document(doc_id) doc = doc_ref.get() if doc.exists: return doc.to_dict() return None