feat: default values, general item excludes, images, queueManager to manage multi worker messaging to telegram to prevent too many connections

Signed-off-by: Omar Sánchez Pizarro <omar.sanchez@pistacero.net>
This commit is contained in:
Omar Sánchez Pizarro
2025-10-10 00:03:44 +02:00
parent 08c1577b2a
commit 0245b603b2
9 changed files with 275 additions and 114 deletions

View File

@@ -2,21 +2,19 @@ import time
import requests
import logging
from datalayer.wallapop_article import WallapopArticle
from managers.telegram_manager import TelegramManager
import traceback
REQUEST_SLEEP_TIME = 15
REQUEST_RETRY_TIME = 3
ERROR_SLEEP_TIME = 30
NOTIFIED_ARTICLES_LIMIT = 300
REQUEST_RETRY_TIME = 5
ERROR_SLEEP_TIME = 60
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
class Worker:
def __init__(self, item_to_monitor):
def __init__(self, item_to_monitor, general_args, queue_manager):
self.logger = logging.getLogger(__name__)
self._item_monitoring = item_to_monitor
self._notified_articles = self._request_articles()
self.telegram_manager = TelegramManager()
self._general_args = general_args
self._queue_manager = queue_manager
self._queue_manager.add_to_notified_articles(self._request_articles())
def _create_url(self):
url = (
@@ -24,15 +22,24 @@ class Worker:
f"?source=search_box"
f"&keywords={self._item_monitoring._search_query}"
f"&order_by=newest"
f"&latitude={self._item_monitoring._latitude}"
f"&longitude={self._item_monitoring._longitude}"
f"&min_sale_price={self._item_monitoring._min_price}"
f"&max_sale_price={self._item_monitoring._max_price}"
f"&language=es_ES"
)
if self._item_monitoring._max_distance != "0":
url += f"&distance={self._item_monitoring._max_distance}"
# Only include latitude and longitude if both are not 0
if self._item_monitoring._latitude != 0 and self._item_monitoring._longitude != 0:
url += (
f"&latitude={self._item_monitoring._latitude}"
f"&longitude={self._item_monitoring._longitude}"
)
if self._item_monitoring._min_price != 0:
url += f"&min_sale_price={self._item_monitoring._min_price}"
if self._item_monitoring._max_price != 0:
url += f"&max_sale_price={self._item_monitoring._max_price}"
if self._item_monitoring._max_distance != 0:
url += f"&distance_in_km={self._item_monitoring._max_distance}"
if self._item_monitoring.get_condition() != "all":
url += f"&condition={self._item_monitoring.get_condition()}" # new, as_good_as_new, good, fair, has_given_it_all
@@ -70,18 +77,18 @@ class Worker:
return any(word in text for word in word_list)
def _title_has_excluded_words(self, article_title):
return self._has_words(article_title, self._item_monitoring.get_title_exclude())
return self._has_words(article_title, self._item_monitoring.get_title_exclude() + self._general_args.get_title_exclude())
def _description_has_excluded_words(self, article_description):
return self._has_words(article_description, self._item_monitoring.get_description_exclude())
return self._has_words(article_description, self._item_monitoring.get_description_exclude() + self._general_args.get_description_exclude())
def _title_has_required_words(self, article_title):
return not self._item_monitoring.get_title_must_include() \
or self._has_words(article_title, self._item_monitoring.get_title_must_include())
or self._has_words(article_title, self._item_monitoring.get_title_must_include() + self._general_args.get_title_must_include())
def _description_has_required_words(self, article_description):
return not self._item_monitoring.get_description_must_include() \
or self._has_words(article_description, self._item_monitoring.get_description_must_include())
or self._has_words(article_description, self._item_monitoring.get_description_must_include() + self._general_args.get_description_must_include())
def _title_first_word_is_excluded(self, article_title):
first_word = article_title.split()[0]
@@ -91,9 +98,6 @@ class Worker:
return False
def _meets_item_conditions(self, article):
if article in self._notified_articles:
return False
article_title = article.get_title().lower()
article_description = article.get_description().lower()
if (
@@ -105,7 +109,7 @@ class Worker:
):
return True
else:
self.logger.info(f"Excluded article: {article}")
self.logger.debug(f"Excluded article: {article}")
return False
def work(self):
@@ -114,20 +118,16 @@ class Worker:
while True:
start_time = time.time()
articles = self._request_articles()
new_articles = 0
for article in articles:
if self._meets_item_conditions(article):
try:
self.telegram_manager.send_telegram_article(article)
new_articles += 1
self._queue_manager.add_to_queue(article, self._item_monitoring.get_name())
except Exception as e:
self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}")
self._notified_articles.insert(0, article)
self._notified_articles = self._notified_articles[:NOTIFIED_ARTICLES_LIMIT]
time.sleep(REQUEST_SLEEP_TIME)
exec_times.append(time.time() - start_time)
self.logger.error(f"{self._item_monitoring.get_name()} worker crashed: {e}")
time.sleep(self._item_monitoring.get_check_every())
exec_times.append(time.time() - start_time - self._item_monitoring.get_check_every())
self.logger.info(
f"Worker '{self._item_monitoring.get_search_query()}': {new_articles} new articles found. "
f"Worker '{self._item_monitoring.get_name()}', "
f"Execution time stats - Last: {exec_times[-1]:.2f}s, Max: {max(exec_times):.2f}s, "
f"Average: {sum(exec_times) / len(exec_times):.2f}s."
)
@@ -135,9 +135,9 @@ class Worker:
def run(self):
while True:
try:
self.logger.info(f"Wallapop monitor worker started - {self._item_monitoring.get_search_query()}")
self.logger.info(f"Wallapop monitor worker started - {self._item_monitoring.get_name()}")
self.work()
except Exception as e:
self.logger.error(f"{''.join(traceback.format_exception(None, e, e.__traceback__))}")
self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed. Restarting worker...")
self.logger.error(f"{self._item_monitoring.get_name()} worker crashed. Restarting worker...")
time.sleep(ERROR_SLEEP_TIME)