103 lines
4.8 KiB
Python
103 lines
4.8 KiB
Python
import time
|
|
import logging
|
|
import traceback
|
|
from platforms.platform_factory import PlatformFactory
|
|
|
|
ERROR_SLEEP_TIME = 60
|
|
|
|
class Worker:
|
|
def __init__(self, item_to_monitor, general_args, queue_manager):
|
|
self.logger = logging.getLogger(__name__)
|
|
self._item_monitoring = item_to_monitor
|
|
self._general_args = general_args
|
|
self._queue_manager = queue_manager
|
|
|
|
# Initialize the platform based on item_to_monitor configuration
|
|
platform_name = self._item_monitoring.get_platform()
|
|
try:
|
|
self._platform = PlatformFactory.create_platform(platform_name, item_to_monitor)
|
|
self.logger.info(f"Initialized platform: {platform_name}")
|
|
except ValueError as e:
|
|
self.logger.error(f"Failed to initialize platform: {e}")
|
|
raise
|
|
|
|
# Initialize the queue with existing articles
|
|
self._queue_manager.add_to_notified_articles(self._request_articles())
|
|
|
|
def _request_articles(self):
|
|
"""
|
|
Request articles from the configured platform
|
|
Platform-specific logic is delegated to the platform implementation
|
|
"""
|
|
return self._platform.fetch_articles()
|
|
|
|
def _has_words(self, text, word_list):
|
|
return any(word in text for word in word_list)
|
|
|
|
def _title_has_excluded_words(self, article_title):
|
|
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
|
|
|
|
def _description_has_excluded_words(self, article_description):
|
|
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
|
|
|
|
def _title_has_required_words(self, article_title):
|
|
return not self._item_monitoring.get_title_must_include() \
|
|
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
|
|
|
|
def _description_has_required_words(self, article_description):
|
|
return not self._item_monitoring.get_description_must_include() \
|
|
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
|
|
|
|
def _title_first_word_is_excluded(self, article_title):
|
|
first_word = article_title.split()[0]
|
|
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
|
if first_word == excluded_word:
|
|
return True
|
|
return False
|
|
|
|
def _meets_item_conditions(self, article):
|
|
article_title = article.get_title().lower()
|
|
article_description = article.get_description().lower()
|
|
if (
|
|
self._title_has_required_words(article_title) and
|
|
self._description_has_required_words(article_description) and
|
|
not self._title_has_excluded_words(article_title) and
|
|
not self._description_has_excluded_words(article_description) and
|
|
not self._title_first_word_is_excluded(article_title)
|
|
):
|
|
return True
|
|
else:
|
|
self.logger.debug(f"Excluded article: {article}")
|
|
return False
|
|
|
|
def work(self):
|
|
exec_times = []
|
|
|
|
while True:
|
|
start_time = time.time()
|
|
articles = self._request_articles()
|
|
for article in articles:
|
|
if self._meets_item_conditions(article):
|
|
try:
|
|
self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id())
|
|
except Exception as e:
|
|
self.logger.error(f"{self._item_monitoring.get_name()} worker crashed: {e}")
|
|
time.sleep(self._item_monitoring.get_check_every())
|
|
exec_times.append(time.time() - start_time - self._item_monitoring.get_check_every())
|
|
self.logger.info(
|
|
f"Worker '{self._item_monitoring.get_name()}', "
|
|
f"Execution time stats - Last: {exec_times[-1]:.2f}s, Max: {max(exec_times):.2f}s, "
|
|
f"Average: {sum(exec_times) / len(exec_times):.2f}s."
|
|
)
|
|
|
|
def run(self):
|
|
while True:
|
|
try:
|
|
platform_name = self._platform.get_platform_name()
|
|
self.logger.info(f"{platform_name.capitalize()} monitor worker started - {self._item_monitoring.get_name()}")
|
|
self.work()
|
|
except Exception as e:
|
|
self.logger.error(f"{''.join(traceback.format_exception(None, e, e.__traceback__))}")
|
|
self.logger.error(f"{self._item_monitoring.get_name()} worker crashed. Restarting worker...")
|
|
time.sleep(ERROR_SLEEP_TIME)
|