Implement WorkerConditions class for article filtering and refactor Worker to utilize it
This commit is contained in:
@@ -2,6 +2,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import traceback
|
import traceback
|
||||||
from platforms.platform_factory import PlatformFactory
|
from platforms.platform_factory import PlatformFactory
|
||||||
|
from managers.worker_conditions import WorkerConditions
|
||||||
|
|
||||||
ERROR_SLEEP_TIME = 60
|
ERROR_SLEEP_TIME = 60
|
||||||
|
|
||||||
@@ -11,7 +12,7 @@ class Worker:
|
|||||||
self._item_monitoring = item_to_monitor
|
self._item_monitoring = item_to_monitor
|
||||||
self._general_args = general_args
|
self._general_args = general_args
|
||||||
self._queue_manager = queue_manager
|
self._queue_manager = queue_manager
|
||||||
|
self._worker_conditions = WorkerConditions(item_to_monitor, general_args)
|
||||||
# Initialize the platform based on item_to_monitor configuration
|
# Initialize the platform based on item_to_monitor configuration
|
||||||
platform_name = self._item_monitoring.get_platform()
|
platform_name = self._item_monitoring.get_platform()
|
||||||
try:
|
try:
|
||||||
@@ -25,51 +26,8 @@ class Worker:
|
|||||||
self._queue_manager.add_to_notified_articles(self._request_articles())
|
self._queue_manager.add_to_notified_articles(self._request_articles())
|
||||||
|
|
||||||
def _request_articles(self):
|
def _request_articles(self):
|
||||||
"""
|
|
||||||
Request articles from the configured platform
|
|
||||||
Platform-specific logic is delegated to the platform implementation
|
|
||||||
"""
|
|
||||||
return self._platform.fetch_articles()
|
return self._platform.fetch_articles()
|
||||||
|
|
||||||
def _has_words(self, text, word_list):
|
|
||||||
return any(word in text for word in word_list)
|
|
||||||
|
|
||||||
def _title_has_excluded_words(self, article_title):
|
|
||||||
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
|
|
||||||
|
|
||||||
def _description_has_excluded_words(self, article_description):
|
|
||||||
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
|
|
||||||
|
|
||||||
def _title_has_required_words(self, article_title):
|
|
||||||
return not self._item_monitoring.get_title_must_include() \
|
|
||||||
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
|
|
||||||
|
|
||||||
def _description_has_required_words(self, article_description):
|
|
||||||
return not self._item_monitoring.get_description_must_include() \
|
|
||||||
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
|
|
||||||
|
|
||||||
def _title_first_word_is_excluded(self, article_title):
|
|
||||||
first_word = article_title.split()[0]
|
|
||||||
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
|
||||||
if first_word == excluded_word:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _meets_item_conditions(self, article):
|
|
||||||
article_title = article.get_title().lower()
|
|
||||||
article_description = article.get_description().lower()
|
|
||||||
if (
|
|
||||||
self._title_has_required_words(article_title) and
|
|
||||||
self._description_has_required_words(article_description) and
|
|
||||||
not self._title_has_excluded_words(article_title) and
|
|
||||||
not self._description_has_excluded_words(article_description) and
|
|
||||||
not self._title_first_word_is_excluded(article_title)
|
|
||||||
):
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
self.logger.debug(f"Excluded article: {article}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def work(self):
|
def work(self):
|
||||||
exec_times = []
|
exec_times = []
|
||||||
|
|
||||||
@@ -77,7 +35,7 @@ class Worker:
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
articles = self._request_articles()
|
articles = self._request_articles()
|
||||||
for article in articles:
|
for article in articles:
|
||||||
if self._meets_item_conditions(article):
|
if self._worker_conditions.meets_item_conditions(article):
|
||||||
try:
|
try:
|
||||||
self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id())
|
self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
43
managers/worker_conditions.py
Normal file
43
managers/worker_conditions.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
class WorkerConditions:
|
||||||
|
def __init__(self, item_monitoring, general_args):
|
||||||
|
self._item_monitoring = item_monitoring
|
||||||
|
self._general_args = general_args
|
||||||
|
|
||||||
|
def _has_words(self, text, word_list):
|
||||||
|
return any(word in text for word in word_list)
|
||||||
|
|
||||||
|
def _title_has_excluded_words(self, article_title):
|
||||||
|
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
|
||||||
|
|
||||||
|
def _description_has_excluded_words(self, article_description):
|
||||||
|
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
|
||||||
|
|
||||||
|
def _title_has_required_words(self, article_title):
|
||||||
|
return not self._item_monitoring.get_title_must_include() \
|
||||||
|
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
|
||||||
|
|
||||||
|
def _description_has_required_words(self, article_description):
|
||||||
|
return not self._item_monitoring.get_description_must_include() \
|
||||||
|
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
|
||||||
|
|
||||||
|
def _title_first_word_is_excluded(self, article_title):
|
||||||
|
first_word = article_title.split()[0]
|
||||||
|
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
||||||
|
if first_word == excluded_word:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def meets_item_conditions(self, article):
|
||||||
|
article_title = article.get_title().lower()
|
||||||
|
article_description = article.get_description().lower()
|
||||||
|
if (
|
||||||
|
self._title_has_required_words(article_title) and
|
||||||
|
self._description_has_required_words(article_description) and
|
||||||
|
not self._title_has_excluded_words(article_title) and
|
||||||
|
not self._description_has_excluded_words(article_description) and
|
||||||
|
not self._title_first_word_is_excluded(article_title)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.logger.debug(f"Excluded article: {article}")
|
||||||
|
return False
|
||||||
Reference in New Issue
Block a user