- Updated .gitignore to include additional IDE and OS files, as well as log and web build directories. - Expanded config.sample.yaml to include cache configuration options for memory and Redis. - Modified wallamonitor.py to load cache configuration and initialize ArticleCache. - Refactored QueueManager to utilize ArticleCache for tracking notified articles. - Improved logging setup to dynamically determine log file path based on environment.
46 lines
2.2 KiB
Python
46 lines
2.2 KiB
Python
import logging
|
|
|
|
class WorkerConditions:
|
|
def __init__(self, item_monitoring, general_args):
|
|
self._item_monitoring = item_monitoring
|
|
self._general_args = general_args
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def _has_words(self, text, word_list):
|
|
return any(word in text for word in word_list)
|
|
|
|
def _title_has_excluded_words(self, article_title):
|
|
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
|
|
|
|
def _description_has_excluded_words(self, article_description):
|
|
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
|
|
|
|
def _title_has_required_words(self, article_title):
|
|
return not self._item_monitoring.get_title_must_include() \
|
|
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
|
|
|
|
def _description_has_required_words(self, article_description):
|
|
return not self._item_monitoring.get_description_must_include() \
|
|
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
|
|
|
|
def _title_first_word_is_excluded(self, article_title):
|
|
first_word = article_title.split()[0]
|
|
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
|
if first_word == excluded_word:
|
|
return True
|
|
return False
|
|
|
|
def meets_item_conditions(self, article):
|
|
article_title = article.get_title().lower()
|
|
article_description = article.get_description().lower()
|
|
if (
|
|
self._title_has_required_words(article_title) and
|
|
self._description_has_required_words(article_description) and
|
|
not self._title_has_excluded_words(article_title) and
|
|
not self._description_has_excluded_words(article_description) and
|
|
not self._title_first_word_is_excluded(article_title)
|
|
):
|
|
return True
|
|
else:
|
|
self.logger.debug(f"Excluded article: {article}")
|
|
return False |