import time import logging import traceback from platforms.platform_factory import PlatformFactory ERROR_SLEEP_TIME = 60 class Worker: def __init__(self, item_to_monitor, general_args, queue_manager): self.logger = logging.getLogger(__name__) self._item_monitoring = item_to_monitor self._general_args = general_args self._queue_manager = queue_manager # Initialize the platform based on item_to_monitor configuration platform_name = self._item_monitoring.get_platform() try: self._platform = PlatformFactory.create_platform(platform_name, item_to_monitor) self.logger.info(f"Initialized platform: {platform_name}") except ValueError as e: self.logger.error(f"Failed to initialize platform: {e}") raise # Initialize the queue with existing articles self._queue_manager.add_to_notified_articles(self._request_articles()) def _request_articles(self): """ Request articles from the configured platform Platform-specific logic is delegated to the platform implementation """ return self._platform.fetch_articles() def _has_words(self, text, word_list): return any(word in text for word in word_list) def _title_has_excluded_words(self, article_title): return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude()) def _description_has_excluded_words(self, article_description): return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude()) def _title_has_required_words(self, article_title): return not self._item_monitoring.get_title_must_include() \ or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include()) def _description_has_required_words(self, article_description): return not self._item_monitoring.get_description_must_include() \ or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include()) def _title_first_word_is_excluded(self, article_title): first_word = article_title.split()[0] for excluded_word in self._item_monitoring.get_title_first_word_exclude(): if first_word == excluded_word: return True return False def _meets_item_conditions(self, article): article_title = article.get_title().lower() article_description = article.get_description().lower() if ( self._title_has_required_words(article_title) and self._description_has_required_words(article_description) and not self._title_has_excluded_words(article_title) and not self._description_has_excluded_words(article_description) and not self._title_first_word_is_excluded(article_title) ): return True else: self.logger.debug(f"Excluded article: {article}") return False def work(self): exec_times = [] while True: start_time = time.time() articles = self._request_articles() for article in articles: if self._meets_item_conditions(article): try: self._queue_manager.add_to_queue(article, self._item_monitoring.get_name(), self._item_monitoring.get_thread_id()) except Exception as e: self.logger.error(f"{self._item_monitoring.get_name()} worker crashed: {e}") time.sleep(self._item_monitoring.get_check_every()) exec_times.append(time.time() - start_time - self._item_monitoring.get_check_every()) self.logger.info( f"Worker '{self._item_monitoring.get_name()}', " f"Execution time stats - Last: {exec_times[-1]:.2f}s, Max: {max(exec_times):.2f}s, " f"Average: {sum(exec_times) / len(exec_times):.2f}s." ) def run(self): while True: try: platform_name = self._platform.get_platform_name() self.logger.info(f"{platform_name.capitalize()} monitor worker started - {self._item_monitoring.get_name()}") self.work() except Exception as e: self.logger.error(f"{''.join(traceback.format_exception(None, e, e.__traceback__))}") self.logger.error(f"{self._item_monitoring.get_name()} worker crashed. Restarting worker...") time.sleep(ERROR_SLEEP_TIME)