Implement WorkerConditions class for article filtering and refactor Worker to utilize it
This commit is contained in:
43
managers/worker_conditions.py
Normal file
43
managers/worker_conditions.py
Normal file
@@ -0,0 +1,43 @@
|
||||
class WorkerConditions:
|
||||
def __init__(self, item_monitoring, general_args):
|
||||
self._item_monitoring = item_monitoring
|
||||
self._general_args = general_args
|
||||
|
||||
def _has_words(self, text, word_list):
|
||||
return any(word in text for word in word_list)
|
||||
|
||||
def _title_has_excluded_words(self, article_title):
|
||||
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
|
||||
|
||||
def _description_has_excluded_words(self, article_description):
|
||||
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
|
||||
|
||||
def _title_has_required_words(self, article_title):
|
||||
return not self._item_monitoring.get_title_must_include() \
|
||||
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
|
||||
|
||||
def _description_has_required_words(self, article_description):
|
||||
return not self._item_monitoring.get_description_must_include() \
|
||||
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
|
||||
|
||||
def _title_first_word_is_excluded(self, article_title):
|
||||
first_word = article_title.split()[0]
|
||||
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
||||
if first_word == excluded_word:
|
||||
return True
|
||||
return False
|
||||
|
||||
def meets_item_conditions(self, article):
|
||||
article_title = article.get_title().lower()
|
||||
article_description = article.get_description().lower()
|
||||
if (
|
||||
self._title_has_required_words(article_title) and
|
||||
self._description_has_required_words(article_description) and
|
||||
not self._title_has_excluded_words(article_title) and
|
||||
not self._description_has_excluded_words(article_description) and
|
||||
not self._title_first_word_is_excluded(article_title)
|
||||
):
|
||||
return True
|
||||
else:
|
||||
self.logger.debug(f"Excluded article: {article}")
|
||||
return False
|
||||
Reference in New Issue
Block a user