feat: default values, general item excludes, images, queueManager to manage multi worker messaging to telegram to prevent too many connections

Signed-off-by: Omar Sánchez Pizarro <omar.sanchez@pistacero.net>
This commit is contained in:
Omar Sánchez Pizarro
2025-10-10 00:03:44 +02:00
parent 08c1577b2a
commit 0245b603b2
9 changed files with 275 additions and 114 deletions

View File

@@ -0,0 +1,32 @@
class GeneralMonitor:
def __init__(self, title_exclude, description_exclude, title_must_include, description_must_include, title_first_word_exclude):
self._title_exclude = title_exclude
self._description_exclude = description_exclude
self._title_must_include = title_must_include
self._description_must_include = description_must_include
self._title_first_word_exclude = title_first_word_exclude
@classmethod
def load_from_json(cls, json_data):
return cls(
json_data.get('title_exclude', []),
json_data.get('description_exclude', []),
json_data.get('title_must_include', []),
json_data.get('description_must_include', []),
json_data.get('title_first_word_exclude', [])
)
def get_title_exclude(self):
return self._title_exclude
def get_description_exclude(self):
return self._description_exclude
def get_title_must_include(self):
return self._title_must_include
def get_description_must_include(self):
return self._description_must_include
def get_title_first_word_exclude(self):
return self._title_first_word_exclude

View File

@@ -1,9 +1,10 @@
class ItemMonitor:
def __init__(self, search_query, latitude, longitude, max_distance,
def __init__(self, name,search_query, latitude, longitude, max_distance,
condition, min_price, max_price, title_exclude,
description_exclude, title_must_include, description_must_include,
title_first_word_exclude):
title_first_word_exclude, check_every):
self._name = name
self._search_query = search_query
self._latitude = latitude
self._longitude = longitude
@@ -16,24 +17,33 @@ class ItemMonitor:
self._title_must_include = title_must_include
self._description_must_include = description_must_include
self._title_first_word_exclude = title_first_word_exclude
self._check_every = check_every
@classmethod
def load_from_json(cls, json_data):
# search_query is mandatory
if 'search_query' not in json_data:
raise ValueError("Missing mandatory field: search_query")
return cls(
json_data['name'],
json_data['search_query'],
json_data['latitude'],
json_data['longitude'],
json_data['max_distance'],
json_data['condition'],
json_data['min_price'],
json_data['max_price'],
json_data['title_exclude'],
json_data['description_exclude'],
json_data['title_must_include'],
json_data['description_must_include'],
json_data['title_first_word_exclude']
json_data.get('latitude', 0),
json_data.get('longitude', 0),
json_data.get('max_distance', 0),
json_data.get('condition', 'all'),
json_data.get('min_price', 0),
json_data.get('max_price', 0),
json_data.get('title_exclude', []),
json_data.get('description_exclude', []),
json_data.get('title_must_include', []),
json_data.get('description_must_include', []),
json_data.get('title_first_word_exclude', []),
json_data.get('check_every', 30)
)
def get_name(self):
return self._name
def get_search_query(self):
return self._search_query
@@ -68,4 +78,7 @@ class ItemMonitor:
return self._description_must_include
def get_title_first_word_exclude(self):
return self._title_first_word_exclude
return self._title_first_word_exclude
def get_check_every(self):
return self._check_every

View File

@@ -1,6 +1,8 @@
import datetime
import pandas as pd
class WallapopArticle:
def __init__(self, id, title, description, price, currency, location, allows_shipping, url):
def __init__(self, id, title, description, price, currency, location, allows_shipping, url, images, modified_at):
self._id = id
self._title = title
self._description = description
@@ -9,7 +11,8 @@ class WallapopArticle:
self._location = location
self._allows_shipping = allows_shipping
self._url = url
self._images = images
self._modified_at = modified_at
@classmethod
def load_from_json(cls, json_data):
return cls(
@@ -20,7 +23,9 @@ class WallapopArticle:
json_data['price']['currency'],
json_data['location']['city'],
json_data['shipping']['user_allows_shipping'],
json_data['web_slug']
json_data['web_slug'],
json_data['images'],
json_data['modified_at']
)
def get_id(self):
@@ -30,7 +35,8 @@ class WallapopArticle:
return self._title
def get_description(self):
return self._description
#return only 500 characters
return self._description[:500] + "..." if len(self._description) > 500 else self._description
def get_price(self):
return self._price
@@ -42,14 +48,23 @@ class WallapopArticle:
return self._location
def get_allows_shipping(self):
return self._allows_shipping
return "" if self._allows_shipping else ""
def get_url(self):
return self._url
def get_images(self):
return [img['urls']['medium'] for img in self._images[:3]]
def get_modified_at(self):
# Convert timestamp in milliseconds to datetime string "YYYY-MM-DD HH:MM:SS"
ts = int(self._modified_at)
dt = datetime.date.fromtimestamp(ts / 1000)
return dt.strftime("%Y-%m-%d %H:%M:%S")
def __eq__(self, article):
return self.get_id() == article.get_id()
def __str__(self):
return f"Article(id={self._id}, title='{self._title}', " \
f"price={self._price} {self._currency}, url='{self._url}')"
f"price={self._price} {self._currency}, url='{self._url}', modified_at='{self._modified_at}')"