Implement WorkerConditions class for article filtering and refactor Worker to utilize it

This commit is contained in:
Omar Sánchez Pizarro
2025-11-17 21:29:03 +01:00
parent 66fc4523d5
commit b32b0b2e09
2 changed files with 46 additions and 45 deletions

View File

@@ -2,6 +2,7 @@ import time
import logging import logging
import traceback import traceback
from platforms.platform_factory import PlatformFactory from platforms.platform_factory import PlatformFactory
from managers.worker_conditions import WorkerConditions
ERROR_SLEEP_TIME = 60 ERROR_SLEEP_TIME = 60
@@ -11,7 +12,7 @@ class Worker:
self._item_monitoring = item_to_monitor self._item_monitoring = item_to_monitor
self._general_args = general_args self._general_args = general_args
self._queue_manager = queue_manager self._queue_manager = queue_manager
self._worker_conditions = WorkerConditions(item_to_monitor, general_args)
# Initialize the platform based on item_to_monitor configuration # Initialize the platform based on item_to_monitor configuration
platform_name = self._item_monitoring.get_platform() platform_name = self._item_monitoring.get_platform()
try: try:
@@ -25,51 +26,8 @@ class Worker:
self._queue_manager.add_to_notified_articles(self._request_articles()) self._queue_manager.add_to_notified_articles(self._request_articles())
def _request_articles(self): def _request_articles(self):
"""
Request articles from the configured platform
Platform-specific logic is delegated to the platform implementation
"""
return self._platform.fetch_articles() return self._platform.fetch_articles()
def _has_words(self, text, word_list):
return any(word in text for word in word_list)
def _title_has_excluded_words(self, article_title):
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
def _description_has_excluded_words(self, article_description):
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
def _title_has_required_words(self, article_title):
return not self._item_monitoring.get_title_must_include() \
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
def _description_has_required_words(self, article_description):
return not self._item_monitoring.get_description_must_include() \
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
def _title_first_word_is_excluded(self, article_title):
first_word = article_title.split()[0]
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
if first_word == excluded_word:
return True
return False
def _meets_item_conditions(self, article):
article_title = article.get_title().lower()
article_description = article.get_description().lower()
if (
self._title_has_required_words(article_title) and
self._description_has_required_words(article_description) and
not self._title_has_excluded_words(article_title) and
not self._description_has_excluded_words(article_description) and
not self._title_first_word_is_excluded(article_title)
):
return True
else:
self.logger.debug(f"Excluded article: {article}")
return False
def work(self): def work(self):
exec_times = [] exec_times = []
@@ -77,7 +35,7 @@ class Worker:
start_time = time.time() start_time = time.time()
articles = self._request_articles() articles = self._request_articles()
for article in articles: for article in articles:
if self._meets_item_conditions(article): if self._worker_conditions.meets_item_conditions(article):
try: try:
self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id()) self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id())
except Exception as e: except Exception as e:

View File

@@ -0,0 +1,43 @@
class WorkerConditions:
def __init__(self, item_monitoring, general_args):
self._item_monitoring = item_monitoring
self._general_args = general_args
def _has_words(self, text, word_list):
return any(word in text for word in word_list)
def _title_has_excluded_words(self, article_title):
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
def _description_has_excluded_words(self, article_description):
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
def _title_has_required_words(self, article_title):
return not self._item_monitoring.get_title_must_include() \
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
def _description_has_required_words(self, article_description):
return not self._item_monitoring.get_description_must_include() \
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
def _title_first_word_is_excluded(self, article_title):
first_word = article_title.split()[0]
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
if first_word == excluded_word:
return True
return False
def meets_item_conditions(self, article):
article_title = article.get_title().lower()
article_description = article.get_description().lower()
if (
self._title_has_required_words(article_title) and
self._description_has_required_words(article_description) and
not self._title_has_excluded_words(article_title) and
not self._description_has_excluded_words(article_description) and
not self._title_first_word_is_excluded(article_title)
):
return True
else:
self.logger.debug(f"Excluded article: {article}")
return False