Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions
--- a/ielts_be/services/impl/exam/writing/init.py
+++ b/ielts_be/services/impl/exam/writing/init.py
@@ -0,0 +1,80 @@
+from typing import List, Dict, Optional
+
+from fastapi import UploadFile
+
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import IWritingService, ILLMService, IAIDetectorService
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from .academic import get_writing_args_academic
+from .general import get_writing_args_general
+from .grade import GradeWriting
+
+
+class WritingService(IWritingService):
+
+    def __init__(self, llm: ILLMService, ai_detector: IAIDetectorService, file_storage: IFileStorage):
+        self._llm = llm
+        self._grade = GradeWriting(llm, file_storage, ai_detector)
+
+    async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
+                )
+            },
+            *get_writing_args_general(task, topic, difficulty)
+        ]
+
+        llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+
+        response = await self._llm.prediction(
+            llm_model,
+            messages,
+            ["prompt"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        question = response["prompt"].strip()
+
+        return {
+            "question": self._add_newline_before_hyphen(question) if task == 1 else question,
+            "difficulty": difficulty,
+            "topic": topic
+        }
+
+    async def get_writing_task_academic_question(self, task: int, file: UploadFile, difficulty: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
+                )
+            },
+            *(await get_writing_args_academic(task, file))
+        ]
+
+        llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+
+        response = await self._llm.prediction(
+            llm_model,
+            messages,
+            ["prompt"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        question = response["prompt"].strip()
+
+        return {
+            "question": self._add_newline_before_hyphen(question) if task == 1 else question,
+            "difficulty": difficulty,
+        }
+
+    async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
+        return await self._grade.grade_writing_task(task, question, answer, attachment)
+
+    @staticmethod
+    def _add_newline_before_hyphen(s):
+        return s.replace(" -", "\n-")
+
--- a/ielts_be/services/impl/exam/writing/academic.py
+++ b/ielts_be/services/impl/exam/writing/academic.py
@@ -0,0 +1,48 @@
+from base64 import b64encode
+from typing import List, Dict
+
+from fastapi.datastructures import UploadFile
+
+
+async def get_writing_args_academic(task: int, attachment: UploadFile) -> List[Dict]:
+    writing_args = {
+        "1": {
+            "prompt": (
+                'Analyze the uploaded image and create a detailed IELTS Writing Task 1 Academic prompt.\n'
+                'Based on the visual data presented, craft a prompt that accurately reflects the image\'s '
+                'content, complexity, and academic nature.\n'
+            ),
+            "instructions": (
+                'The generated prompt must:\n'
+                '1. Clearly describe the type of visual representation in the image\n'
+                '2. Provide a concise context for the data shown\n'
+                '3. End with the standard IELTS Task 1 Academic instruction:\n'
+                '"Summarise the information by selecting and reporting the main features, and make comparisons where relevant."'
+            )
+        },
+    }
+
+    if task == 2:
+        raise NotImplemented("Task 2 academic isn't implemented yet, current implementation still uses General Task 2 prompts.")
+
+    messages = [
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["prompt"]
+        },
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["instructions"]
+        }
+    ]
+
+    if task == 1:
+        attachment_bytes = await attachment.read()
+        messages.append({
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/{attachment.filename.split('.')[-1]};base64,{b64encode(attachment_bytes).decode('utf-8')}"
+            }
+        })
+
+    return messages
--- a/ielts_be/services/impl/exam/writing/general.py
+++ b/ielts_be/services/impl/exam/writing/general.py
@@ -0,0 +1,44 @@
+from typing import List, Dict
+
+
+def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dict]:
+    writing_args = {
+        "1": {
+            "prompt": (
+                'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
+                'student to compose a letter. The prompt should present a specific scenario or situation, '
+                f'based on the topic of "{topic}", requiring the student to provide information, '
+                'advice, or instructions within the letter. Make sure that the generated prompt is '
+                f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
+            ),
+            "instructions": (
+                'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
+                'the answer should include.'
+            )
+        },
+        "2": {
+            # TODO: Should the muslim disclaimer be here as well?
+            "prompt": (
+                f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
+                'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
+                f'analysis of contrasting perspectives on the topic of "{topic}".'
+            ),
+            "instructions": (
+                'The question should lead to an answer with either "theories", "complicated information" or '
+                'be "very descriptive" on the topic.'
+            )
+        }
+    }
+
+    messages = [
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["prompt"]
+        },
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["instructions"]
+        }
+    ]
+
+    return messages
--- a/ielts_be/services/impl/exam/writing/grade.py
+++ b/ielts_be/services/impl/exam/writing/grade.py
@@ -0,0 +1,207 @@
+import asyncio
+from typing import Dict, Optional
+from uuid import uuid4
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import TextHelper, ExercisesHelper, FileHelper
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import ILLMService, IAIDetectorService
+
+
+class GradeWriting:
+
+    def __init__(self, llm: ILLMService, file_storage: IFileStorage, ai_detector: IAIDetectorService):
+        self._llm = llm
+        self._file_storage = file_storage
+        self._ai_detector = ai_detector
+
+    async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
+        bare_minimum = 100 if task == 1 else 180
+
+        if not TextHelper.has_words(answer):
+            return self._zero_rating("The answer does not contain enough english words.")
+        elif not TextHelper.has_x_words(answer, bare_minimum):
+            return self._zero_rating("The answer is insufficient and too small to be graded.")
+        else:
+            template = self._get_writing_template()
+            messages = [
+                {
+                    "role": "system",
+                    "content": (
+                        f'You are a helpful assistant designed to output JSON on this format: {template}'
+                    )
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
+                        'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
+                        'from the task, and assign a score of 0 if the response fails to address the question. '
+                        'Additionally, provide a detailed commentary highlighting both strengths and '
+                        'weaknesses in the response. '
+                        f'\n Question: "{question}" \n Answer: "{answer}"')
+                }
+            ]
+
+            if task == 1:
+                if attachment is None:
+                    messages.append({
+                        "role": "user",
+                        "content": (
+                            'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
+                            '"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
+                        )
+                    })
+                else:
+                    uuid = str(uuid4())
+                    name = attachment.split('/')[-1]
+                    out_path = f'./tmp/{uuid}/{name}'
+                    path = await self._file_storage.download_firebase_file(attachment, out_path)
+                    messages.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/{name.split('.')[-1]};base64,{FileHelper.encode_image(path)}"
+                        }
+                    })
+
+            llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+            temperature = (
+                TemperatureSettings.GRADING_TEMPERATURE
+                if task == 1 else
+                TemperatureSettings.GEN_QUESTION_TEMPERATURE
+            )
+
+            evaluation_promise = self._llm.prediction(
+                llm_model,
+                messages,
+                ["comment"],
+                temperature
+            )
+
+            perfect_answer_minimum = 150 if task == 1 else 250
+            perfect_answer_promise = self._get_perfect_answer(question, perfect_answer_minimum)
+            fixed_text_promise = self._get_fixed_text(answer)
+            ai_detection_promise = self._ai_detector.run_detection(answer)
+
+            prediction_result, perfect_answer_result, fixed_text_result, ai_detection_result = await asyncio.gather(
+                evaluation_promise,
+                perfect_answer_promise,
+                fixed_text_promise,
+                ai_detection_promise
+            )
+
+            response = prediction_result
+            response["perfect_answer"] = perfect_answer_result["perfect_answer"]
+            response["overall"] = ExercisesHelper.fix_writing_overall(
+                response["overall"],
+                response["task_response"]
+            )
+            response['fixed_text'] = fixed_text_result
+
+            if ai_detection_result is not None:
+                response['ai_detection'] = ai_detection_result
+
+            return response
+
+    async def _get_fixed_text(self, text):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"fixed_text": "fixed test with no misspelling errors"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Fix the errors in the given text and put it in a JSON. '
+                    f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_3_5_TURBO,
+            messages,
+            ["fixed_text"],
+            0.2,
+            False
+        )
+        return response["fixed_text"]
+
+    async def _get_perfect_answer(self, question: str, size: int) -> Dict:
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"perfect_answer": "perfect answer for the question"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
+
+            },
+            {
+                "role": "user",
+                "content": f'The answer must have at least {size} words'
+            }
+        ]
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["perfect_answer"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    @staticmethod
+    def _zero_rating(comment: str):
+        return {
+            'comment': comment,
+            'overall': 0,
+            'task_response': {
+                'Task Achievement': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Coherence and Cohesion': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Lexical Resource': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Grammatical Range and Accuracy': {
+                    "grade": 0.0,
+                    "comment": ""
+                }
+            }
+        }
+
+    @staticmethod
+    def _get_writing_template():
+        return {
+            "comment": "comment about student's response quality",
+            "overall": 0.0,
+            "task_response": {
+                "Task Achievement": {
+                    "grade": 0.0,
+                    "comment": "comment about Task Achievement of the student's response"
+                },
+                "Coherence and Cohesion": {
+                    "grade": 0.0,
+                    "comment": "comment about Coherence and Cohesion of the student's response"
+                },
+                "Lexical Resource": {
+                    "grade": 0.0,
+                    "comment": "comment about Lexical Resource of the student's response"
+                },
+                "Grammatical Range and Accuracy": {
+                    "grade": 0.0,
+                    "comment": "comment about Grammatical Range and Accuracy of the student's response"
+                }
+            }
+        }