Pending changes update

This commit is contained in:
Kifixo
2024-02-28 20:33:44 +01:00
parent 6032b33874
commit f2416ee35a
5 changed files with 84 additions and 39 deletions

View File

@@ -1,12 +1,14 @@
class Article: class Article:
def __init__(self, id, title, description, price, currency, url): def __init__(self, id, title, description, price, currency, location, allows_shipping, url):
self._id = id self._id = id
self._title = title self._title = title
self._description = description self._description = description
self._price = price self._price = price
self._currency = currency self._currency = currency
self._location = location
self._allows_shipping = allows_shipping
self._url = url self._url = url
@classmethod @classmethod
@@ -17,6 +19,8 @@ class Article:
json_data['description'], json_data['description'],
json_data['price'], json_data['price'],
json_data['currency'], json_data['currency'],
json_data['location']['city'],
json_data['shipping']['user_allows_shipping'],
json_data['web_slug'] json_data['web_slug']
) )
@@ -35,8 +39,18 @@ class Article:
def get_currency(self): def get_currency(self):
return self._currency return self._currency
def get_location(self):
return self._location
def get_allows_shipping(self):
return self._allows_shipping
def get_url(self): def get_url(self):
return self._url return self._url
def __eq__(self, article2): def __eq__(self, article2):
return self.get_id() == article2.get_id() return self.get_id() == article2.get_id()
def __str__(self):
return f"Article(id={self._id}, title='{self._title}', description='{self._description}', " \
f"price={self._price} {self._currency}, url='{self._url}')"

View File

@@ -3,7 +3,8 @@ import string
class ItemMonitor: class ItemMonitor:
def __init__(self, search_query, latitude, longitude, max_distance, def __init__(self, search_query, latitude, longitude, max_distance,
condition, min_price, max_price, title_exclude, condition, min_price, max_price, title_exclude,
description_exclude, title_must_include, description_must_include): description_exclude, title_must_include, description_must_include,
title_first_word_exclude):
self._search_query = search_query self._search_query = search_query
self._latitude = latitude self._latitude = latitude
self._longitude = longitude self._longitude = longitude
@@ -15,6 +16,7 @@ class ItemMonitor:
self._description_exclude = description_exclude self._description_exclude = description_exclude
self._title_must_include = title_must_include self._title_must_include = title_must_include
self._description_must_include = description_must_include self._description_must_include = description_must_include
self._title_first_word_exclude = title_first_word_exclude
@classmethod @classmethod
def load_from_json(cls, json_data): def load_from_json(cls, json_data):
@@ -29,7 +31,8 @@ class ItemMonitor:
json_data['title_exclude'], json_data['title_exclude'],
json_data['description_exclude'], json_data['description_exclude'],
json_data['title_must_include'], json_data['title_must_include'],
json_data['description_must_include'] json_data['description_must_include'],
json_data['title_first_word_exclude']
) )
def get_search_query(self): def get_search_query(self):
@@ -63,4 +66,7 @@ class ItemMonitor:
return self._title_must_include return self._title_must_include
def get_description_must_include(self): def get_description_must_include(self):
return self._description_must_include return self._description_must_include
def get_title_first_word_exclude(self):
return self._title_first_word_exclude

17
main.py
View File

@@ -1,12 +1,23 @@
import json import json
import threading import threading
import logging import logging
from logging.handlers import RotatingFileHandler
from item_monitor import ItemMonitor from item_monitor import ItemMonitor
from worker import Worker from worker import Worker
logging.basicConfig(level=logging.INFO, # Configure the console logger
format='%(asctime)s - %(levelname)s - %(message)s', console_handler = logging.StreamHandler()
handlers=[logging.FileHandler('main_log.txt'), logging.StreamHandler()]) console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
# Configure the file logger
file_handler = RotatingFileHandler('monitor.log', maxBytes=10e6)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
# Configure the root logger with both handlers
logging.basicConfig(level=logging.NOTSET,
handlers=[console_handler, file_handler])
def parse_items_to_monitor(): def parse_items_to_monitor():
with open("args.json") as f: with open("args.json") as f:

View File

@@ -6,11 +6,14 @@ import telegram
import re import re
ITEM_TEXT = "*Artículo*: {}\n" \ ITEM_TEXT = "- *Artículo*: {}\n" \
"*Descripción*: {}\n" \ "- *Descripción*: {}\n" \
"*Precio*: {} {}\n" \ "- *Localidad*: {}\n" \
"- *Precio*: {} {}\n" \
"- *Acepta envíos*: {}\n" \
"[Ir al anuncio](https://es.wallapop.com/item/{})" "[Ir al anuncio](https://es.wallapop.com/item/{})"
class TelegramHandler: class TelegramHandler:
def __init__(self): def __init__(self):
token, channel = self.get_config() token, channel = self.get_config()
@@ -37,8 +40,9 @@ class TelegramHandler:
self._loop.run_until_complete(self.send_telegram_article_async(article)) self._loop.run_until_complete(self.send_telegram_article_async(article))
async def send_telegram_article_async(self, article): async def send_telegram_article_async(self, article):
message = ITEM_TEXT.format(article.get_title(), article.get_description(), message = ITEM_TEXT.format(article.get_title(), self.escape_markdown(article.get_description()),
article.get_price(), article.get_currency(), self.escape_markdown(article.get_location()), article.get_price(),
article.get_currency(), article.get_allows_shipping(),
article.get_url()) article.get_url())
escaped_message = self.escape_markdown(message) escaped_message = self.escape_markdown(message)
await self._bot.send_message(self._channel, text=escaped_message, parse_mode="MarkdownV2") await self._bot.send_message(self._channel, text=escaped_message, parse_mode="MarkdownV2")

View File

@@ -6,13 +6,10 @@ from telegram_handler import TelegramHandler
import traceback import traceback
import asyncio import asyncio
REQUEST_SLEEP_TIME = 5 REQUEST_SLEEP_TIME = 10
REQUEST_RETRY_TIME = 3 REQUEST_RETRY_TIME = 3
ERROR_SLEEP_TIME = 10 ERROR_SLEEP_TIME = 10
NOTIFIED_ARTICLES_LIMIT = 300
worker_logger = logging.getLogger(__name__)
worker_logger.setLevel(logging.INFO) # Set the level as needed
worker_logger.addHandler(logging.StreamHandler())
class Worker: class Worker:
def __init__(self, item_to_monitor): def __init__(self, item_to_monitor):
@@ -57,32 +54,44 @@ class Worker:
def _has_words(self, text, word_list): def _has_words(self, text, word_list):
return any(word in text for word in word_list) return any(word in text for word in word_list)
def _title_has_excluded_words(self, article): def _title_has_excluded_words(self, article_title):
return self._has_words(article.get_title(), return self._has_words(article_title, self._item_monitoring.get_title_exclude())
self._item_monitoring.get_title_exclude())
def _description_has_excluded_words(self, article): def _description_has_excluded_words(self, article_description):
return self._has_words(article.get_description(), return self._has_words(article_description, self._item_monitoring.get_description_exclude())
self._item_monitoring.get_description_exclude())
def _title_has_required_words(self, article): def _title_has_required_words(self, article_title):
return not self._item_monitoring.get_title_must_include() \ return not self._item_monitoring.get_title_must_include() \
or self._has_words(article.get_title(), or self._has_words(article_title, self._item_monitoring.get_title_must_include())
self._item_monitoring.get_title_must_include())
def _description_has_required_words(self, article): def _description_has_required_words(self, article_description):
return not self._item_monitoring.get_description_must_include() \ return not self._item_monitoring.get_description_must_include() \
or self._has_words(article.get_description(), or self._has_words(article_description, self._item_monitoring.get_description_must_include())
self._item_monitoring.get_description_must_include())
def _title_first_word_is_excluded(self, article_title):
first_word = article_title.split()[0]
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
if first_word == excluded_word:
return True
return False
def _meets_item_conditions(self, article): def _meets_item_conditions(self, article):
return ( if article in self._notified_articles:
self._title_has_required_words(article) and return False
self._description_has_required_words(article) and
not self._title_has_excluded_words(article) and article_title = article.get_title().lower()
not self._description_has_excluded_words(article) and article_description = article.get_description().lower()
article not in self._notified_articles if (
) self._title_has_required_words(article_title) and
self._description_has_required_words(article_description) and
not self._title_has_excluded_words(article_title) and
not self._description_has_excluded_words(article_description) and
not self._title_first_word_is_excluded(article_title)
):
return True
else:
self.logger.info(f"Excluded article: {article}")
return False
def work(self): def work(self):
exec_times = [] exec_times = []
@@ -94,12 +103,13 @@ class Worker:
if self._meets_item_conditions(article): if self._meets_item_conditions(article):
try: try:
self._telegram_handler.send_telegram_article(article) self._telegram_handler.send_telegram_article(article)
self._notified_articles.insert(0, article)
except Exception as e: except Exception as e:
self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}") self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}")
self._notified_articles.insert(0, article)
self._notified_articles = self._notified_articles[:NOTIFIED_ARTICLES_LIMIT]
time.sleep(REQUEST_SLEEP_TIME) time.sleep(REQUEST_SLEEP_TIME)
exec_times.append(time.time() - start_time) exec_times.append(time.time() - start_time)
self.logger.info(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}" self.logger.debug(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}"
f" max: {max(exec_times)} avg: {sum(exec_times) / len(exec_times)}") f" max: {max(exec_times)} avg: {sum(exec_times) / len(exec_times)}")
def run(self): def run(self):