From b32b0b2e09381924346e5b32cf0956bc668c7c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Omar=20S=C3=A1nchez=20Pizarro?= Date: Mon, 17 Nov 2025 21:29:03 +0100 Subject: [PATCH] Implement WorkerConditions class for article filtering and refactor Worker to utilize it --- managers/worker.py | 48 +++-------------------------------- managers/worker_conditions.py | 43 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 45 deletions(-) create mode 100644 managers/worker_conditions.py diff --git a/managers/worker.py b/managers/worker.py index d942562..9c81f61 100644 --- a/managers/worker.py +++ b/managers/worker.py @@ -2,6 +2,7 @@ import time import logging import traceback from platforms.platform_factory import PlatformFactory +from managers.worker_conditions import WorkerConditions ERROR_SLEEP_TIME = 60 @@ -11,7 +12,7 @@ class Worker: self._item_monitoring = item_to_monitor self._general_args = general_args self._queue_manager = queue_manager - + self._worker_conditions = WorkerConditions(item_to_monitor, general_args) # Initialize the platform based on item_to_monitor configuration platform_name = self._item_monitoring.get_platform() try: @@ -25,51 +26,8 @@ class Worker: self._queue_manager.add_to_notified_articles(self._request_articles()) def _request_articles(self): - """ - Request articles from the configured platform - Platform-specific logic is delegated to the platform implementation - """ return self._platform.fetch_articles() - def _has_words(self, text, word_list): - return any(word in text for word in word_list) - - def _title_has_excluded_words(self, article_title): - return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude()) - - def _description_has_excluded_words(self, article_description): - return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude()) - - def _title_has_required_words(self, article_title): - return not self._item_monitoring.get_title_must_include() \ - or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include()) - - def _description_has_required_words(self, article_description): - return not self._item_monitoring.get_description_must_include() \ - or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include()) - - def _title_first_word_is_excluded(self, article_title): - first_word = article_title.split()[0] - for excluded_word in self._item_monitoring.get_title_first_word_exclude(): - if first_word == excluded_word: - return True - return False - - def _meets_item_conditions(self, article): - article_title = article.get_title().lower() - article_description = article.get_description().lower() - if ( - self._title_has_required_words(article_title) and - self._description_has_required_words(article_description) and - not self._title_has_excluded_words(article_title) and - not self._description_has_excluded_words(article_description) and - not self._title_first_word_is_excluded(article_title) - ): - return True - else: - self.logger.debug(f"Excluded article: {article}") - return False - def work(self): exec_times = [] @@ -77,7 +35,7 @@ class Worker: start_time = time.time() articles = self._request_articles() for article in articles: - if self._meets_item_conditions(article): + if self._worker_conditions.meets_item_conditions(article): try: self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id()) except Exception as e: diff --git a/managers/worker_conditions.py b/managers/worker_conditions.py new file mode 100644 index 0000000..e6acb67 --- /dev/null +++ b/managers/worker_conditions.py @@ -0,0 +1,43 @@ +class WorkerConditions: + def __init__(self, item_monitoring, general_args): + self._item_monitoring = item_monitoring + self._general_args = general_args + + def _has_words(self, text, word_list): + return any(word in text for word in word_list) + + def _title_has_excluded_words(self, article_title): + return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude()) + + def _description_has_excluded_words(self, article_description): + return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude()) + + def _title_has_required_words(self, article_title): + return not self._item_monitoring.get_title_must_include() \ + or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include()) + + def _description_has_required_words(self, article_description): + return not self._item_monitoring.get_description_must_include() \ + or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include()) + + def _title_first_word_is_excluded(self, article_title): + first_word = article_title.split()[0] + for excluded_word in self._item_monitoring.get_title_first_word_exclude(): + if first_word == excluded_word: + return True + return False + + def meets_item_conditions(self, article): + article_title = article.get_title().lower() + article_description = article.get_description().lower() + if ( + self._title_has_required_words(article_title) and + self._description_has_required_words(article_description) and + not self._title_has_excluded_words(article_title) and + not self._description_has_excluded_words(article_description) and + not self._title_first_word_is_excluded(article_title) + ): + return True + else: + self.logger.debug(f"Excluded article: {article}") + return False \ No newline at end of file