Pending changes update
This commit is contained in:
18
article.py
18
article.py
@@ -1,12 +1,14 @@
|
|||||||
|
|
||||||
|
|
||||||
class Article:
|
class Article:
|
||||||
def __init__(self, id, title, description, price, currency, url):
|
def __init__(self, id, title, description, price, currency, location, allows_shipping, url):
|
||||||
self._id = id
|
self._id = id
|
||||||
self._title = title
|
self._title = title
|
||||||
self._description = description
|
self._description = description
|
||||||
self._price = price
|
self._price = price
|
||||||
self._currency = currency
|
self._currency = currency
|
||||||
|
self._location = location
|
||||||
|
self._allows_shipping = allows_shipping
|
||||||
self._url = url
|
self._url = url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -17,6 +19,8 @@ class Article:
|
|||||||
json_data['description'],
|
json_data['description'],
|
||||||
json_data['price'],
|
json_data['price'],
|
||||||
json_data['currency'],
|
json_data['currency'],
|
||||||
|
json_data['location']['city'],
|
||||||
|
json_data['shipping']['user_allows_shipping'],
|
||||||
json_data['web_slug']
|
json_data['web_slug']
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -35,8 +39,18 @@ class Article:
|
|||||||
def get_currency(self):
|
def get_currency(self):
|
||||||
return self._currency
|
return self._currency
|
||||||
|
|
||||||
|
def get_location(self):
|
||||||
|
return self._location
|
||||||
|
|
||||||
|
def get_allows_shipping(self):
|
||||||
|
return self._allows_shipping
|
||||||
|
|
||||||
def get_url(self):
|
def get_url(self):
|
||||||
return self._url
|
return self._url
|
||||||
|
|
||||||
def __eq__(self, article2):
|
def __eq__(self, article2):
|
||||||
return self.get_id() == article2.get_id()
|
return self.get_id() == article2.get_id()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Article(id={self._id}, title='{self._title}', description='{self._description}', " \
|
||||||
|
f"price={self._price} {self._currency}, url='{self._url}')"
|
||||||
@@ -3,7 +3,8 @@ import string
|
|||||||
class ItemMonitor:
|
class ItemMonitor:
|
||||||
def __init__(self, search_query, latitude, longitude, max_distance,
|
def __init__(self, search_query, latitude, longitude, max_distance,
|
||||||
condition, min_price, max_price, title_exclude,
|
condition, min_price, max_price, title_exclude,
|
||||||
description_exclude, title_must_include, description_must_include):
|
description_exclude, title_must_include, description_must_include,
|
||||||
|
title_first_word_exclude):
|
||||||
self._search_query = search_query
|
self._search_query = search_query
|
||||||
self._latitude = latitude
|
self._latitude = latitude
|
||||||
self._longitude = longitude
|
self._longitude = longitude
|
||||||
@@ -15,6 +16,7 @@ class ItemMonitor:
|
|||||||
self._description_exclude = description_exclude
|
self._description_exclude = description_exclude
|
||||||
self._title_must_include = title_must_include
|
self._title_must_include = title_must_include
|
||||||
self._description_must_include = description_must_include
|
self._description_must_include = description_must_include
|
||||||
|
self._title_first_word_exclude = title_first_word_exclude
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load_from_json(cls, json_data):
|
def load_from_json(cls, json_data):
|
||||||
@@ -29,7 +31,8 @@ class ItemMonitor:
|
|||||||
json_data['title_exclude'],
|
json_data['title_exclude'],
|
||||||
json_data['description_exclude'],
|
json_data['description_exclude'],
|
||||||
json_data['title_must_include'],
|
json_data['title_must_include'],
|
||||||
json_data['description_must_include']
|
json_data['description_must_include'],
|
||||||
|
json_data['title_first_word_exclude']
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_search_query(self):
|
def get_search_query(self):
|
||||||
@@ -63,4 +66,7 @@ class ItemMonitor:
|
|||||||
return self._title_must_include
|
return self._title_must_include
|
||||||
|
|
||||||
def get_description_must_include(self):
|
def get_description_must_include(self):
|
||||||
return self._description_must_include
|
return self._description_must_include
|
||||||
|
|
||||||
|
def get_title_first_word_exclude(self):
|
||||||
|
return self._title_first_word_exclude
|
||||||
17
main.py
17
main.py
@@ -1,12 +1,23 @@
|
|||||||
import json
|
import json
|
||||||
import threading
|
import threading
|
||||||
import logging
|
import logging
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
from item_monitor import ItemMonitor
|
from item_monitor import ItemMonitor
|
||||||
from worker import Worker
|
from worker import Worker
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO,
|
# Configure the console logger
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
console_handler = logging.StreamHandler()
|
||||||
handlers=[logging.FileHandler('main_log.txt'), logging.StreamHandler()])
|
console_handler.setLevel(logging.INFO)
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
|
||||||
|
# Configure the file logger
|
||||||
|
file_handler = RotatingFileHandler('monitor.log', maxBytes=10e6)
|
||||||
|
file_handler.setLevel(logging.DEBUG)
|
||||||
|
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||||
|
|
||||||
|
# Configure the root logger with both handlers
|
||||||
|
logging.basicConfig(level=logging.NOTSET,
|
||||||
|
handlers=[console_handler, file_handler])
|
||||||
|
|
||||||
def parse_items_to_monitor():
|
def parse_items_to_monitor():
|
||||||
with open("args.json") as f:
|
with open("args.json") as f:
|
||||||
|
|||||||
@@ -6,11 +6,14 @@ import telegram
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
ITEM_TEXT = "*Artículo*: {}\n" \
|
ITEM_TEXT = "- *Artículo*: {}\n" \
|
||||||
"*Descripción*: {}\n" \
|
"- *Descripción*: {}\n" \
|
||||||
"*Precio*: {} {}\n" \
|
"- *Localidad*: {}\n" \
|
||||||
|
"- *Precio*: {} {}\n" \
|
||||||
|
"- *Acepta envíos*: {}\n" \
|
||||||
"[Ir al anuncio](https://es.wallapop.com/item/{})"
|
"[Ir al anuncio](https://es.wallapop.com/item/{})"
|
||||||
|
|
||||||
|
|
||||||
class TelegramHandler:
|
class TelegramHandler:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
token, channel = self.get_config()
|
token, channel = self.get_config()
|
||||||
@@ -37,8 +40,9 @@ class TelegramHandler:
|
|||||||
self._loop.run_until_complete(self.send_telegram_article_async(article))
|
self._loop.run_until_complete(self.send_telegram_article_async(article))
|
||||||
|
|
||||||
async def send_telegram_article_async(self, article):
|
async def send_telegram_article_async(self, article):
|
||||||
message = ITEM_TEXT.format(article.get_title(), article.get_description(),
|
message = ITEM_TEXT.format(article.get_title(), self.escape_markdown(article.get_description()),
|
||||||
article.get_price(), article.get_currency(),
|
self.escape_markdown(article.get_location()), article.get_price(),
|
||||||
|
article.get_currency(), article.get_allows_shipping(),
|
||||||
article.get_url())
|
article.get_url())
|
||||||
escaped_message = self.escape_markdown(message)
|
escaped_message = self.escape_markdown(message)
|
||||||
await self._bot.send_message(self._channel, text=escaped_message, parse_mode="MarkdownV2")
|
await self._bot.send_message(self._channel, text=escaped_message, parse_mode="MarkdownV2")
|
||||||
62
worker.py
62
worker.py
@@ -6,13 +6,10 @@ from telegram_handler import TelegramHandler
|
|||||||
import traceback
|
import traceback
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
REQUEST_SLEEP_TIME = 5
|
REQUEST_SLEEP_TIME = 10
|
||||||
REQUEST_RETRY_TIME = 3
|
REQUEST_RETRY_TIME = 3
|
||||||
ERROR_SLEEP_TIME = 10
|
ERROR_SLEEP_TIME = 10
|
||||||
|
NOTIFIED_ARTICLES_LIMIT = 300
|
||||||
worker_logger = logging.getLogger(__name__)
|
|
||||||
worker_logger.setLevel(logging.INFO) # Set the level as needed
|
|
||||||
worker_logger.addHandler(logging.StreamHandler())
|
|
||||||
|
|
||||||
class Worker:
|
class Worker:
|
||||||
def __init__(self, item_to_monitor):
|
def __init__(self, item_to_monitor):
|
||||||
@@ -57,32 +54,44 @@ class Worker:
|
|||||||
def _has_words(self, text, word_list):
|
def _has_words(self, text, word_list):
|
||||||
return any(word in text for word in word_list)
|
return any(word in text for word in word_list)
|
||||||
|
|
||||||
def _title_has_excluded_words(self, article):
|
def _title_has_excluded_words(self, article_title):
|
||||||
return self._has_words(article.get_title(),
|
return self._has_words(article_title, self._item_monitoring.get_title_exclude())
|
||||||
self._item_monitoring.get_title_exclude())
|
|
||||||
|
|
||||||
def _description_has_excluded_words(self, article):
|
def _description_has_excluded_words(self, article_description):
|
||||||
return self._has_words(article.get_description(),
|
return self._has_words(article_description, self._item_monitoring.get_description_exclude())
|
||||||
self._item_monitoring.get_description_exclude())
|
|
||||||
|
|
||||||
def _title_has_required_words(self, article):
|
def _title_has_required_words(self, article_title):
|
||||||
return not self._item_monitoring.get_title_must_include() \
|
return not self._item_monitoring.get_title_must_include() \
|
||||||
or self._has_words(article.get_title(),
|
or self._has_words(article_title, self._item_monitoring.get_title_must_include())
|
||||||
self._item_monitoring.get_title_must_include())
|
|
||||||
|
|
||||||
def _description_has_required_words(self, article):
|
def _description_has_required_words(self, article_description):
|
||||||
return not self._item_monitoring.get_description_must_include() \
|
return not self._item_monitoring.get_description_must_include() \
|
||||||
or self._has_words(article.get_description(),
|
or self._has_words(article_description, self._item_monitoring.get_description_must_include())
|
||||||
self._item_monitoring.get_description_must_include())
|
|
||||||
|
def _title_first_word_is_excluded(self, article_title):
|
||||||
|
first_word = article_title.split()[0]
|
||||||
|
for excluded_word in self._item_monitoring.get_title_first_word_exclude():
|
||||||
|
if first_word == excluded_word:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def _meets_item_conditions(self, article):
|
def _meets_item_conditions(self, article):
|
||||||
return (
|
if article in self._notified_articles:
|
||||||
self._title_has_required_words(article) and
|
return False
|
||||||
self._description_has_required_words(article) and
|
|
||||||
not self._title_has_excluded_words(article) and
|
article_title = article.get_title().lower()
|
||||||
not self._description_has_excluded_words(article) and
|
article_description = article.get_description().lower()
|
||||||
article not in self._notified_articles
|
if (
|
||||||
)
|
self._title_has_required_words(article_title) and
|
||||||
|
self._description_has_required_words(article_description) and
|
||||||
|
not self._title_has_excluded_words(article_title) and
|
||||||
|
not self._description_has_excluded_words(article_description) and
|
||||||
|
not self._title_first_word_is_excluded(article_title)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.logger.info(f"Excluded article: {article}")
|
||||||
|
return False
|
||||||
|
|
||||||
def work(self):
|
def work(self):
|
||||||
exec_times = []
|
exec_times = []
|
||||||
@@ -94,12 +103,13 @@ class Worker:
|
|||||||
if self._meets_item_conditions(article):
|
if self._meets_item_conditions(article):
|
||||||
try:
|
try:
|
||||||
self._telegram_handler.send_telegram_article(article)
|
self._telegram_handler.send_telegram_article(article)
|
||||||
self._notified_articles.insert(0, article)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}")
|
self.logger.error(f"{self._item_monitoring.get_search_query()} worker crashed: {e}")
|
||||||
|
self._notified_articles.insert(0, article)
|
||||||
|
self._notified_articles = self._notified_articles[:NOTIFIED_ARTICLES_LIMIT]
|
||||||
time.sleep(REQUEST_SLEEP_TIME)
|
time.sleep(REQUEST_SLEEP_TIME)
|
||||||
exec_times.append(time.time() - start_time)
|
exec_times.append(time.time() - start_time)
|
||||||
self.logger.info(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}"
|
self.logger.debug(f"\'{self._item_monitoring.get_search_query()}\' node-> last: {exec_times[-1]}"
|
||||||
f" max: {max(exec_times)} avg: {sum(exec_times) / len(exec_times)}")
|
f" max: {max(exec_times)} avg: {sum(exec_times) / len(exec_times)}")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user