From 4ff3b02a1d760b70ecdd4ae79b2fc39bb7ab6a0c Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Tue, 11 Jun 2024 21:49:27 +0100 Subject: [PATCH] Double check for english words in writing grading. --- helper/speech_to_text_helper.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/helper/speech_to_text_helper.py b/helper/speech_to_text_helper.py index 8e13dcc..001bd11 100644 --- a/helper/speech_to_text_helper.py +++ b/helper/speech_to_text_helper.py @@ -95,17 +95,26 @@ def conversation_text_to_speech(conversation: list, file_name: str): def has_words(text: str): + if not has_common_words(text): + return False english_words = set(words.words()) words_in_input = text.split() return any(word.lower() in english_words for word in words_in_input) def has_x_words(text: str, quantity): + if not has_common_words(text): + return False english_words = set(words.words()) words_in_input = text.split() english_word_count = sum(1 for word in words_in_input if word.lower() in english_words) return english_word_count >= quantity +def has_common_words(text: str): + english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"} + words_in_input = text.split() + english_word_count = sum(1 for word in words_in_input if word.lower() in english_words) + return english_word_count >= 10 def divide_text(text, max_length=3000): if len(text) <= max_length: