From 196f9e9c3e8f1661cd1da96585d9b327789dda37 Mon Sep 17 00:00:00 2001 From: Carlos-Mesquita Date: Wed, 11 Dec 2024 15:23:00 +0000 Subject: [PATCH] ENCOA-274 and patch to the Dockerfile, in some merge the firebase tools were left out --- Dockerfile | 5 ++++ ielts_be/configs/dependency_injection.py | 2 +- ielts_be/controllers/impl/exam/writing.py | 1 - ielts_be/controllers/impl/user.py | 1 + ielts_be/dtos/writing.py | 1 - .../services/impl/exam/writing/__init__.py | 4 +-- .../services/impl/exam/writing/academic.py | 25 +++++++++++-------- ielts_be/services/impl/exam/writing/grade.py | 15 ++++++----- .../services/impl/third_parties/openai.py | 5 +++- 9 files changed, 36 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 97e2f7c..0a9e536 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,6 +29,11 @@ RUN apt update && apt install -y \ librsvg2-bin \ && rm -rf /var/lib/apt/lists/* +RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs + +RUN npm install -g firebase-tools + RUN pip install --no-cache-dir -r /app/requirements.txt EXPOSE 8000 diff --git a/ielts_be/configs/dependency_injection.py b/ielts_be/configs/dependency_injection.py index 084ffe5..f7af3aa 100644 --- a/ielts_be/configs/dependency_injection.py +++ b/ielts_be/configs/dependency_injection.py @@ -94,7 +94,7 @@ class DependencyInjector: ) self._container.writing_service = providers.Factory( - WritingService, llm=self._container.llm, ai_detector=self._container.ai_detector + WritingService, llm=self._container.llm, ai_detector=self._container.ai_detector, file_storage=self._container.firebase_instance ) with open('ielts_be/services/impl/exam/level/mc_variants.json', 'r') as file: diff --git a/ielts_be/controllers/impl/exam/writing.py b/ielts_be/controllers/impl/exam/writing.py index 75bcc17..5fdf19b 100644 --- a/ielts_be/controllers/impl/exam/writing.py +++ b/ielts_be/controllers/impl/exam/writing.py @@ -15,5 +15,4 @@ class WritingController(IWritingController): async def get_writing_task_academic_question(self, task: int, attachment: UploadFile, difficulty: str): if attachment.content_type not in ['image/jpeg', 'image/png']: raise HTTPException(status_code=400, detail="Invalid file type. Only JPEG and PNG allowed.") - return await self._service.get_writing_task_academic_question(task, attachment, difficulty) diff --git a/ielts_be/controllers/impl/user.py b/ielts_be/controllers/impl/user.py index 9802e5d..52ecb93 100644 --- a/ielts_be/controllers/impl/user.py +++ b/ielts_be/controllers/impl/user.py @@ -7,6 +7,7 @@ class UserController(IUserController): def __init__(self, user_service: IUserService): self._service = user_service + print(self._service) async def batch_import(self, batch: BatchUsersDTO): return await self._service.batch_users(batch) diff --git a/ielts_be/dtos/writing.py b/ielts_be/dtos/writing.py index ba33aa1..4dc9b4d 100644 --- a/ielts_be/dtos/writing.py +++ b/ielts_be/dtos/writing.py @@ -9,5 +9,4 @@ class WritingGradeTaskDTO(BaseModel): exerciseId: str question: str answer: str - type: str attachment: Optional[str] diff --git a/ielts_be/services/impl/exam/writing/__init__.py b/ielts_be/services/impl/exam/writing/__init__.py index d18d68d..9c9691c 100644 --- a/ielts_be/services/impl/exam/writing/__init__.py +++ b/ielts_be/services/impl/exam/writing/__init__.py @@ -55,10 +55,8 @@ class WritingService(IWritingService): *(await get_writing_args_academic(task, file)) ] - llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O - response = await self._llm.prediction( - llm_model, + GPTModels.GPT_4_O, messages, ["prompt"], TemperatureSettings.GEN_QUESTION_TEMPERATURE diff --git a/ielts_be/services/impl/exam/writing/academic.py b/ielts_be/services/impl/exam/writing/academic.py index 59868fc..4e8081f 100644 --- a/ielts_be/services/impl/exam/writing/academic.py +++ b/ielts_be/services/impl/exam/writing/academic.py @@ -25,6 +25,9 @@ async def get_writing_args_academic(task: int, attachment: UploadFile) -> List[D if task == 2: raise NotImplemented("Task 2 academic isn't implemented yet, current implementation still uses General Task 2 prompts.") + attachment_bytes = await attachment.read() + + messages = [ { "role": "user", @@ -32,17 +35,19 @@ async def get_writing_args_academic(task: int, attachment: UploadFile) -> List[D }, { "role": "user", - "content": writing_args[str(task)]["instructions"] + "content": [ + { + "type": "text", + "text": writing_args[str(task)]["instructions"], + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/{attachment.filename.split('.')[-1]};base64,{b64encode(attachment_bytes).decode('utf-8')}" + } + } + ] } ] - if task == 1: - attachment_bytes = await attachment.read() - messages.append({ - "type": "image_url", - "image_url": { - "url": f"data:image/{attachment.filename.split('.')[-1]};base64,{b64encode(attachment_bytes).decode('utf-8')}" - } - }) - return messages diff --git a/ielts_be/services/impl/exam/writing/grade.py b/ielts_be/services/impl/exam/writing/grade.py index d725ec3..1b22a30 100644 --- a/ielts_be/services/impl/exam/writing/grade.py +++ b/ielts_be/services/impl/exam/writing/grade.py @@ -57,14 +57,17 @@ class GradeWriting: name = attachment.split('/')[-1] out_path = f'./tmp/{uuid}/{name}' path = await self._file_storage.download_firebase_file(attachment, out_path) - messages.append({ - "type": "image_url", - "image_url": { - "url": f"data:image/{name.split('.')[-1]};base64,{FileHelper.encode_image(path)}" + messages.append( + { + "role": "user", + "content": { + "type": "image_url", + "image_url": { + "url": f"data:image/{name.split('.')[-1]};base64,{FileHelper.encode_image(path)}" + } } }) - llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O temperature = ( TemperatureSettings.GRADING_TEMPERATURE if task == 1 else @@ -72,7 +75,7 @@ class GradeWriting: ) evaluation_promise = self._llm.prediction( - llm_model, + GPTModels.GPT_4_O, messages, ["comment"], temperature diff --git a/ielts_be/services/impl/third_parties/openai.py b/ielts_be/services/impl/third_parties/openai.py index 036903a..95aaf1f 100644 --- a/ielts_be/services/impl/third_parties/openai.py +++ b/ielts_be/services/impl/third_parties/openai.py @@ -93,7 +93,10 @@ class OpenAI(ILLMService): def _count_total_tokens(messages): total_tokens = 0 for message in messages: - total_tokens += count_tokens(message["content"])["n_tokens"] + # Skip when content isn't text + message_content = message.get("content", None) + if message_content is not None and isinstance(message_content, str): + total_tokens += count_tokens(message["content"])["n_tokens"] return total_tokens @staticmethod