Fix exclude words

This commit is contained in:
Daniel Huici Meseguer
2022-02-07 14:49:34 +00:00
parent 1654cac207
commit 39a973f69b
7 changed files with 341 additions and 279 deletions

View File

@@ -1,40 +1,40 @@
# Wallamonitor # Wallamonitor
Periodically checks Wallapop for new articles based on specified parameters and notify through Telegram channel. Periodically checks Wallapop for new articles based on specified parameters and notify through Telegram channel.
### Setup 🔧 ### Setup 🔧
``` ```
pip3 install -U python-dotenv pip3 install -U python-dotenv
pip3 install python-telegram-bot pip3 install python-telegram-bot
``` ```
You will also need to change .env parameters: You will also need to change .env parameters:
``` ```
TELEGRAM_CHANNEL_ID=@Your_Telegram_Channel_ID TELEGRAM_CHANNEL_ID=@Your_Telegram_Channel_ID
TELEGRAM_TOKEN=Your Telegram Token TELEGRAM_TOKEN=Your Telegram Token
``` ```
### Usage ### Usage
``` ```
$ python3 alert.py -h $ python3 alert.py -h
usage: alert.py [-h] --name NAME [--latitude LATITUDE] [--longitude LONGITUDE] [--condition CONDITION] usage: alert.py [-h] --name NAME [--latitude LATITUDE] [--longitude LONGITUDE] [--condition CONDITION]
[--min MIN_PRICE] [--max MAX_PRICE] [--min MIN_PRICE] [--max MAX_PRICE]
Arguments Arguments
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
--name NAME Article name --name NAME Article name
--latitude LATITUDE Latitude --latitude LATITUDE Latitude
--longitude LONGITUDE Longitude --longitude LONGITUDE Longitude
--condition CONDITION Item condition: all, new, as_good_as_new, good, fair, has_given_it_all --condition CONDITION Item condition: all, new, as_good_as_new, good, fair, has_given_it_all
--min MIN_PRICE Min price --min MIN_PRICE Min price
--max MAX_PRICE Max price --max MAX_PRICE Max price
``` ```
Example: Example:
``` ```
$ python3 alert.py --name ps5 --condition new --min 400 --max 600 $ python3 alert.py --name ps5 --condition new --min 400 --max 600
``` ```

Binary file not shown.

View File

@@ -1,28 +1,28 @@
# Wallamonitor # Wallamonitor
# 10/02/2021 # 10/02/2021
import time import time
import requests import requests
import json import json
import telegram import telegram
import argparse import argparse
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
import threading import threading
from worker import Worker from worker import Worker
def parse_json_file(): def parse_json_file():
f = open("args.json") f = open("args.json")
return json.load(f) return json.load(f)
def main(): def main():
args = parse_json_file() args = parse_json_file()
for argument in args: for argument in args:
p = threading.Thread(target=Worker.run, args=(argument, )) p = threading.Thread(target=Worker.run, args=(argument, ))
p.start() p.start()
main() main()

144
args.json
View File

@@ -1,92 +1,52 @@
[ [
{ {
"product_name": "ps4", "product_name": "placa base",
"latitude": "40.4165", "latitude": "40.4165",
"longitude": "-3.70256", "longitude": "-3.70256",
"condition": "all", "condition": "all",
"min_price": "40", "min_price": "20",
"max_price": "80", "max_price": "75",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "mando", "Mando", "DualShock"], "title_keyword_exclude" : [],
"exclude": [] "exclude": []
}, },
{ {
"product_name": "ps4", "product_name": "ram",
"latitude": "40.4165", "latitude": "40.4165",
"longitude": "-3.70256", "longitude": "-3.70256",
"condition": "has_given_it_all", "condition": "all",
"min_price": "20", "min_price": "10",
"max_price": "50", "max_price": "40",
"title_key_word_exclude" : [], "title_keyword_exclude" : [],
"exclude": [] "exclude": []
}, },
{ {
"product_name": "3ds", "product_name": "grafica",
"latitude": "40.4165", "latitude": "40.4165",
"longitude": "-3.70256", "longitude": "-3.70256",
"condition": "all", "condition": "all",
"min_price": "15", "min_price": "90",
"max_price": "60", "max_price": "200",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "Juegos", "pokemon", "Pokemon"], "title_keyword_exclude" : [],
"exclude": [] "exclude": ["1050", "960"]
}, },
{ {
"product_name": "nvidia", "product_name": "nvidia",
"latitude": "40.4165", "latitude": "40.4165",
"longitude": "-3.70256", "longitude": "-3.70256",
"condition": "all", "condition": "all",
"min_price": "80", "min_price": "90",
"max_price": "160", "max_price": "200",
"title_key_word_exclude" : [], "title_keyword_exclude" : [],
"exclude": [] "exclude": ["1050", "960"]
}, },
{ {
"product_name": "gtx", "product_name": "gtx",
"latitude": "40.4165", "latitude": "40.4165",
"longitude": "-3.70256", "longitude": "-3.70256",
"condition": "all", "condition": "all",
"min_price": "80", "min_price": "90",
"max_price": "160", "max_price": "200",
"title_key_word_exclude" : [], "title_keyword_exclude" : [],
"exclude": ["1050", "950", "960"] "exclude": ["1050", "960"]
}, }
{ ]
"product_name": "grafica",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "iphone",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "90",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": ["iphone 6", "iphone 7", "iPhone 7", "iPhone 8", "Iphone 6", "Iphone 7"]
},
{
"product_name": "mac",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "surface",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "300",
"title_key_word_exclude" : [],
"exclude": [""]
}
]

92
back_args.json Normal file
View File

@@ -0,0 +1,92 @@
[
{
"product_name": "ps4",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "40",
"max_price": "80",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "mando", "Mando", "DualShock"],
"exclude": []
},
{
"product_name": "ps4",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "has_given_it_all",
"min_price": "20",
"max_price": "50",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "3ds",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "15",
"max_price": "60",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "Juegos", "pokemon", "Pokemon"],
"exclude": []
},
{
"product_name": "nvidia",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "gtx",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "grafica",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "iphone",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "90",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": ["iphone 6", "iphone 7", "iPhone 7", "iPhone 8", "Iphone 6", "Iphone 7"]
},
{
"product_name": "mac",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "surface",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "300",
"title_key_word_exclude" : [],
"exclude": []
}
]

1
error_log.txt Normal file
View File

@@ -0,0 +1 @@
grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse 9jd5lyeq726k: portatil toshiba satelite pro i3 r50 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pzp1p4nql9z3: Memoria ram kingston hyperx ddr2 4 gb a 1.066 MH .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8z8gxdxyol63: MÓVILES HUAWEI P8 LITE .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse 9jd5lyeq726k: portatil toshiba satelite pro i3 r50 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse mznv5n09ok6n: torre ordenador i5 8GB SSD 240GB HDMI .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse nzxyk71xg1j2: ASUS PH-GT1030-O2G GT 1030 2GB GDDR5 .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse wzvlmpw7k46l: Ordenador portatil HP Probook (560) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse nzxyk71xg1j2: ASUS PH-GT1030-O2G GT 1030 2GB GDDR5 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .

249
worker.py
View File

@@ -1,120 +1,129 @@
import time import time
import requests import requests
import json import json
import telegram import telegram
import argparse import argparse
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
import threading import threading
from proxy_requests import ProxyRequests
TELEGRAM_CHANNEL_ID = os.getenv("TELEGRAM_CHANNEL_ID")
TELEGRAM_CHANNEL_ID = os.getenv("TELEGRAM_CHANNEL_ID") TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN") SLEEP_TIME=30
SLEEP_TIME=30
class Worker:
class Worker:
def request(self, product_name, n_articles, latitude='40.4165', longitude='-3.70256', condition='all', min_price=0, max_price=10000000):
def request(self, product_name, n_articles, latitude='40.4165', longitude='-3.70256', condition='all', min_price=0, max_price=10000000): url = (f"http://api.wallapop.com/api/v3/general/search?keywords={product_name}"
url = (f"http://api.wallapop.com/api/v3/general/search?keywords={product_name}" f"&order_by=newest&latitude={latitude}"
f"&order_by=newest&latitude={latitude}" f"&longitude={longitude}"
f"&longitude={longitude}" f"&min_sale_price={min_price}"
f"&min_sale_price={min_price}" f"&max_sale_price={max_price}"
f"&max_sale_price={max_price}" f"&filters_source=quick_filters&language=es_ES")
f"&filters_source=quick_filters&language=es_ES")
if condition != "all":
if condition != "all": url = url + f"&condition={condition}" # new, as_good_as_new, good, fair, has_given_it_all
url = url + f"&condition={condition}" # new, as_good_as_new, good, fair, has_given_it_all
while True:
while True: response = requests.get(url)
response = ProxyRequests(url) try:
try: if response.status_code == 200:
response.get() break
if response.get_status_code() == 200: else:
break print(f"\'{product_name}\' -> Wallapop returned status {response.get_status_code() }. Illegal parameters or Wallapop service is down. Retrying...")
else: except Exception as e:
print(f"\'{product_name}\' -> Wallapop returned status {response.get_status_code() }. Illegal parameters or Wallapop service is down. Retrying...") print("Exception: "+e)
except: time.sleep(3)
time.sleep(3)
json_data=response.json()
json_data=json.loads(response.get_raw()) return json_data['search_objects']
return json_data['search_objects']
def first_run(self, args):
def first_run(self, args): list = []
list = [] articles = self.request(args['product_name'], 0, args['latitude'], args['longitude'], args['condition'], args['min_price'], args['max_price'])
articles = self.request(args['product_name'], 0, args['latitude'], args['longitude'], args['condition'], args['min_price'], args['max_price']) for article in articles:
for article in articles: list.insert(0, article['id'])
list.insert(0, article['id'])
return list
return list
def work(self, args, list):
def work(self, args, list): exec_times = []
exec_times = [] bot = telegram.Bot(token = TELEGRAM_TOKEN)
bot = telegram.Bot(token = TELEGRAM_TOKEN)
while True:
while True: start_time = time.time()
start_time = time.time() articles = self.request(args['product_name'], 0, args['latitude'], args['longitude'], args['condition'], args['min_price'], args['max_price'])
articles = self.request(args['product_name'], 0, args['latitude'], args['longitude'], args['condition'], args['min_price'], args['max_price']) for article in articles:
for article in articles: if not article['id'] in list:
if not article['id'] in list: try:
if not self.has_excluded_words(article['title'].lower(), article['description'].lower(), args['exclude']) and not self.is_title_key_word_excluded(article['title'].lower(), args['title_key_word_exclude']): if not self.has_excluded_words(article['title'].lower(), article['description'].lower(), args['exclude']) and not self.is_title_key_word_excluded(article['title'].lower(), args['title_keyword_exclude']):
try: try:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n" bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: {article['description']}\n" f"*Descripción*: {article['description']}\n"
f"*Precio*: {article['price']} {article['currency']}\n" f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})" f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN") , "MARKDOWN")
except: except:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n" bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: Descripción inválida\n" f"*Descripción*: Descripción inválida\n"
f"*Precio*: {article['price']} {article['currency']}\n" f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})" f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN") , "MARKDOWN")
time.sleep(3) # Avoid Telegram flood restriction time.sleep(1) # Avoid Telegram flood restriction
list.insert(0, article['id']) list.insert(0, article['id'])
exec_times.append(time.time() - start_time) except Exception as e:
print(f"\'{args['product_name']}\' node-> last: {exec_times[-1]} max: {self.get_max_time(exec_times)} avg: {self.get_average_time(exec_times)}") print("---------- EXCEPTION -----------")
f = open("error_log.txt", "a")
def has_excluded_words(self, title, description, excluded_words): f.write(f"{args['product_name']} worker crashed. {e}")
for word in excluded_words: f.write(f"{args['product_name']}: Trying to parse {article['id']}: {article['title']} .")
print("EXCLUDER: Checking '" + word + "' for title: '" + title) f.close()
if word in title or word in description:
print("EXCLUDE!")
return True time.sleep(5)
return False exec_times.append(time.time() - start_time)
print(f"\'{args['product_name']}\' node-> last: {exec_times[-1]} max: {self.get_max_time(exec_times)} avg: {self.get_average_time(exec_times)}")
def is_title_key_word_excluded(self, title, excluded_words):
for word in excluded_words: def has_excluded_words(self, title, description, excluded_words):
print("Checking '" + word + "' for title: '" + title) for word in excluded_words:
if word in title.split()[0]: print("EXCLUDER: Checking '" + word + "' for title: '" + title)
return True if word in title or word in description:
return False print("EXCLUDE!")
return True
def get_average_time(self, exec_times): return False
sum = 0
for i in exec_times: def is_title_key_word_excluded(self, title, excluded_words):
sum = sum + i for word in excluded_words:
print("Checking '" + word + "' for title: '" + title)
return sum / len(exec_times) if word in title.split()[0]:
return True
def get_max_time(self, exec_times): return False
largest = 0
for i in exec_times: def get_average_time(self, exec_times):
if i > largest: sum = 0
largest = i for i in exec_times:
return largest sum = sum + i
return sum / len(exec_times)
def run(args):
worker = Worker() def get_max_time(self, exec_times):
list = worker.first_run(args) largest = 0
while True: for i in exec_times:
try: if i > largest:
print(f"Wallapop monitor worker started. Checking for new items containing: \'{args['product_name']}\' with given parameters periodically") largest = i
worker.work(args, list) return largest
except Exception as e:
print(f"Exception: {e}")
print(f"{args['product_name']} worker crashed. Restarting worker...") def run(args):
time.sleep(15) worker = Worker()
list = worker.first_run(args)
while True:
#try:
print(f"Wallapop monitor worker started. Checking for new items containing: \'{args['product_name']}\' with given parameters periodically")
worker.work(args, list)
#except Exception as e:
# print(f"Exception: {e}")
# print(f"{args['product_name']} worker crashed. Restarting worker...")
# time.sleep(10)