diff --git a/article.py b/article.py index 70ee670..27f53a0 100644 --- a/article.py +++ b/article.py @@ -1,12 +1,14 @@ class Article: - def __init__(self, id, title, description, price, currency, url): + def __init__(self, id, title, description, price, currency, location, allows_shipping, url): self._id = id self._title = title self._description = description self._price = price self._currency = currency + self._location = location + self._allows_shipping = allows_shipping self._url = url @classmethod @@ -17,6 +19,8 @@ class Article: json_data['description'], json_data['price'], json_data['currency'], + json_data['location']['city'], + json_data['shipping']['user_allows_shipping'], json_data['web_slug'] ) @@ -35,8 +39,18 @@ class Article: def get_currency(self): return self._currency + def get_location(self): + return self._location + + def get_allows_shipping(self): + return self._allows_shipping + def get_url(self): return self._url def __eq__(self, article2): - return self.get_id() == article2.get_id() \ No newline at end of file + return self.get_id() == article2.get_id() + + def __str__(self): + return f"Article(id={self._id}, title='{self._title}', description='{self._description}', " \ + f"price={self._price} {self._currency}, url='{self._url}')" \ No newline at end of file diff --git a/item_monitor.py b/item_monitor.py index 6d93506..5ddb563 100644 --- a/item_monitor.py +++ b/item_monitor.py @@ -3,7 +3,8 @@ import string class ItemMonitor: def __init__(self, search_query, latitude, longitude, max_distance, condition, min_price, max_price, title_exclude, - description_exclude, title_must_include, description_must_include): + description_exclude, title_must_include, description_must_include, + title_first_word_exclude): self._search_query = search_query self._latitude = latitude self._longitude = longitude @@ -15,6 +16,7 @@ class ItemMonitor: self._description_exclude = description_exclude self._title_must_include = title_must_include self._description_must_include = description_must_include + self._title_first_word_exclude = title_first_word_exclude @classmethod def load_from_json(cls, json_data): @@ -29,7 +31,8 @@ class ItemMonitor: json_data['title_exclude'], json_data['description_exclude'], json_data['title_must_include'], - json_data['description_must_include'] + json_data['description_must_include'], + json_data['title_first_word_exclude'] ) def get_search_query(self): @@ -63,4 +66,7 @@ class ItemMonitor: return self._title_must_include def get_description_must_include(self): - return self._description_must_include \ No newline at end of file + return self._description_must_include + + def get_title_first_word_exclude(self): + return self._title_first_word_exclude \ No newline at end of file diff --git a/main.py b/main.py index 90bf9b5..24eeef0 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,23 @@ import json import threading import logging +from logging.handlers import RotatingFileHandler from item_monitor import ItemMonitor from worker import Worker -logging.basicConfig(level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - handlers=[logging.FileHandler('main_log.txt'), logging.StreamHandler()]) +# Configure the console logger +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + +# Configure the file logger +file_handler = RotatingFileHandler('monitor.log', maxBytes=10e6) +file_handler.setLevel(logging.DEBUG) +file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + +# Configure the root logger with both handlers +logging.basicConfig(level=logging.NOTSET, + handlers=[console_handler, file_handler]) def parse_items_to_monitor(): with open("args.json") as f: diff --git a/telegram_handler.py b/telegram_handler.py index a64bbd7..5143a4a 100644 --- a/telegram_handler.py +++ b/telegram_handler.py @@ -6,11 +6,14 @@ import telegram import re -ITEM_TEXT = "*Artículo*: {}\n" \ - "*Descripción*: {}\n" \ - "*Precio*: {} {}\n" \ +ITEM_TEXT = "- *Artículo*: {}\n" \ + "- *Descripción*: {}\n" \ + "- *Localidad*: {}\n" \ + "- *Precio*: {} {}\n" \ + "- *Acepta envíos*: {}\n" \ "[Ir al anuncio](https://es.wallapop.com/item/{})" + class TelegramHandler: def __init__(self): token, channel = self.get_config() @@ -37,8 +40,9 @@ class TelegramHandler: self._loop.run_until_complete(self.send_telegram_article_async(article)) async def send_telegram_article_async(self, article): - message = ITEM_TEXT.format(article.get_title(), article.get_description(), - article.get_price(), article.get_currency(), + message = ITEM_TEXT.format(article.get_title(), self.escape_markdown(article.get_description()), + self.escape_markdown(article.get_location()), article.get_price(), + article.get_currency(), article.get_allows_shipping(), article.get_url()) escaped_message = self.escape_markdown(message) await self._bot.send_message(self._channel, text=escaped_message, parse_mode="MarkdownV2") \ No newline at end of file diff --git a/worker.py b/worker.py index 834259d..10014d1 100644 --- a/worker.py +++ b/worker.py @@ -6,13 +6,10 @@ from telegram_handler import TelegramHandler import traceback import asyncio -REQUEST_SLEEP_TIME = 5 +REQUEST_SLEEP_TIME = 10 REQUEST_RETRY_TIME = 3 ERROR_SLEEP_TIME = 10 - -worker_logger = logging.getLogger(__name__) -worker_logger.setLevel(logging.INFO) # Set the level as needed -worker_logger.addHandler(logging.StreamHandler()) +NOTIFIED_ARTICLES_LIMIT = 300 class Worker: def __init__(self, item_to_monitor): @@ -57,32 +54,44 @@ class Worker: def _has_words(self, text, word_list): return any(word in text for word in word_list) - def _title_has_excluded_words(self, article): - return self._has_words(article.get_title(), - self._item_monitoring.get_title_exclude()) + def _title_has_excluded_words(self, article_title): + return self._has_words(article_title, self._item_monitoring.get_title_exclude()) - def _description_has_excluded_words(self, article): - return self._has_words(article.get_description(), - self._item_monitoring.get_description_exclude()) + def _description_has_excluded_words(self, article_description): + return self._has_words(article_description, self._item_monitoring.get_description_exclude()) - def _title_has_required_words(self, article): + def _title_has_required_words(self, article_title): return not self._item_monitoring.get_title_must_include() \ - or self._has_words(article.get_title(), - self._item_monitoring.get_title_must_include()) + or self._has_words(article_title, self._item_monitoring.get_title_must_include()) - def _description_has_required_words(self, article): + def _description_has_required_words(self, article_description): return not self._item_monitoring.get_description_must_include() \ - or self._has_words(article.get_description(), - self._item_monitoring.get_description_must_include()) + or self._has_words(article_description, self._item_monitoring.get_description_must_include()) + + def _title_first_word_is_excluded(self, article_title): + first_word = article_title.split()[0] + for excluded_word in self._item_monitoring.get_title_first_word_exclude(): + if first_word == excluded_word: + return True + return False def _meets_item_conditions(self, article): - return ( - self._title_has_required_words(article) and - self._description_has_required_words(article) and - not self._title_has_excluded_words(article) and - not self._description_has_excluded_words(article) and - article not in self._notified_articles - ) + if article in self._notified_articles: + return False + + article_title = article.get_title().lower() + article_description = article.get_description().lower() + if ( + self._title_has_required_words(article_title) and + self._description_has_required_words(article_description) and + not self._title_has_excluded_words(article_title) and + not self._description_has_excluded_words(article_description) and + not self._title_first_word_is_excluded(article_title) + ): + return True + else: + self.logger.info(f"Excluded article: {article}") + return False def work(self): exec_times = [] @@ -94,12 +103,13 @@ class Worker: if self._meets_item_conditions(article): try: self._telegram_handler.send_telegram_article(article) - self._notified_articles.insert(0, article) except Exception as e: self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}") + self._notified_articles.insert(0, article) + self._notified_articles = self._notified_articles[:NOTIFIED_ARTICLES_LIMIT] time.sleep(REQUEST_SLEEP_TIME) exec_times.append(time.time() - start_time) - self.logger.info(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}" + self.logger.debug(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}" f" max: {max(exec_times)} avg: {sum(exec_times) / len(exec_times)}") def run(self):