Fix exclude words

This commit is contained in:
Daniel Huici Meseguer
2022-02-07 14:49:34 +00:00
parent 1654cac207
commit 39a973f69b
7 changed files with 341 additions and 279 deletions

Binary file not shown.

View File

@@ -1,92 +1,52 @@
[
{
"product_name": "ps4",
"product_name": "placa base",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "40",
"max_price": "80",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "mando", "Mando", "DualShock"],
"exclude": []
},
{
"product_name": "ps4",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "has_given_it_all",
"min_price": "20",
"max_price": "50",
"title_key_word_exclude" : [],
"exclude": []
"max_price": "75",
"title_keyword_exclude" : [],
"exclude": []
},
{
"product_name": "3ds",
{
"product_name": "ram",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "15",
"max_price": "60",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "Juegos", "pokemon", "Pokemon"],
"exclude": []
"min_price": "10",
"max_price": "40",
"title_keyword_exclude" : [],
"exclude": []
},
{
"product_name": "nvidia",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "gtx",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
{
"product_name": "grafica",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "iphone",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "90",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": ["iphone 6", "iphone 7", "iPhone 7", "iPhone 8", "Iphone 6", "Iphone 7"]
"title_keyword_exclude" : [],
"exclude": ["1050", "960"]
},
{
"product_name": "mac",
{
"product_name": "nvidia",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"min_price": "90",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": []
"title_keyword_exclude" : [],
"exclude": ["1050", "960"]
},
{
"product_name": "surface",
{
"product_name": "gtx",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "300",
"title_key_word_exclude" : [],
"exclude": [""]
"min_price": "90",
"max_price": "200",
"title_keyword_exclude" : [],
"exclude": ["1050", "960"]
}
]

92
back_args.json Normal file
View File

@@ -0,0 +1,92 @@
[
{
"product_name": "ps4",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "40",
"max_price": "80",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "mando", "Mando", "DualShock"],
"exclude": []
},
{
"product_name": "ps4",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "has_given_it_all",
"min_price": "20",
"max_price": "50",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "3ds",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "15",
"max_price": "60",
"title_key_word_exclude" : ["juego", "juegos", "Juego", "Juegos", "pokemon", "Pokemon"],
"exclude": []
},
{
"product_name": "nvidia",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "gtx",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "grafica",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "80",
"max_price": "160",
"title_key_word_exclude" : [],
"exclude": ["1050", "950", "960"]
},
{
"product_name": "iphone",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "90",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": ["iphone 6", "iphone 7", "iPhone 7", "iPhone 8", "Iphone 6", "Iphone 7"]
},
{
"product_name": "mac",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "200",
"title_key_word_exclude" : [],
"exclude": []
},
{
"product_name": "surface",
"latitude": "40.4165",
"longitude": "-3.70256",
"condition": "all",
"min_price": "100",
"max_price": "300",
"title_key_word_exclude" : [],
"exclude": []
}
]

1
error_log.txt Normal file
View File

@@ -0,0 +1 @@
grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse 9jd5lyeq726k: portatil toshiba satelite pro i3 r50 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pzp1p4nql9z3: Memoria ram kingston hyperx ddr2 4 gb a 1.066 MH .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8z8gxdxyol63: MÓVILES HUAWEI P8 LITE .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse 9jd5lyeq726k: portatil toshiba satelite pro i3 r50 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse mznv5n09ok6n: torre ordenador i5 8GB SSD 240GB HDMI .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse nzxyk71xg1j2: ASUS PH-GT1030-O2G GT 1030 2GB GDDR5 .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse wzvlmpw7k46l: Ordenador portatil HP Probook (560) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .grafica worker crashed. 'title_key_word_exclude'grafica: Trying to parse nzxyk71xg1j2: ASUS PH-GT1030-O2G GT 1030 2GB GDDR5 .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 36enl0qm3y6d: Servicio Técnico Apple Valencia .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse x6q90e52oozy: GALAXY J3 (2016) 8GB negro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse p617rwnr7565: Memoria Ram DDR3 1600 mHz (2 módulos x 4GB) .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse pj9g19o1d06e: Ram ddr4 1gb .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzvlmp1dg46l: torre pc acer para piezas sin el disco duro .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse 8j3xn10dmlj9: Samsung Galaxy J5 2015 , SM-J500FN . Dorado .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse e6530nvvpgzo: Dos módulos de memoria RAM .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse qjwdo3ly5wzo: Torre AMD Athlon 64 X2 Dual Core 6000+ .ram worker crashed. 'title_key_word_exclude'ram: Trying to parse wzy72wddwvz5: Memoria ram Kingston 3gb DDR2,800mhz y 667mhz .

View File

@@ -7,7 +7,6 @@ from dotenv import load_dotenv
import os
load_dotenv()
import threading
from proxy_requests import ProxyRequests
TELEGRAM_CHANNEL_ID = os.getenv("TELEGRAM_CHANNEL_ID")
@@ -28,17 +27,17 @@ class Worker:
url = url + f"&condition={condition}" # new, as_good_as_new, good, fair, has_given_it_all
while True:
response = ProxyRequests(url)
response = requests.get(url)
try:
response.get()
if response.get_status_code() == 200:
if response.status_code == 200:
break
else:
print(f"\'{product_name}\' -> Wallapop returned status {response.get_status_code() }. Illegal parameters or Wallapop service is down. Retrying...")
except:
except Exception as e:
print("Exception: "+e)
time.sleep(3)
json_data=json.loads(response.get_raw())
json_data=response.json()
return json_data['search_objects']
def first_run(self, args):
@@ -58,21 +57,31 @@ class Worker:
articles = self.request(args['product_name'], 0, args['latitude'], args['longitude'], args['condition'], args['min_price'], args['max_price'])
for article in articles:
if not article['id'] in list:
if not self.has_excluded_words(article['title'].lower(), article['description'].lower(), args['exclude']) and not self.is_title_key_word_excluded(article['title'].lower(), args['title_key_word_exclude']):
try:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: {article['description']}\n"
f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN")
except:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: Descripción inválida\n"
f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN")
time.sleep(3) # Avoid Telegram flood restriction
list.insert(0, article['id'])
try:
if not self.has_excluded_words(article['title'].lower(), article['description'].lower(), args['exclude']) and not self.is_title_key_word_excluded(article['title'].lower(), args['title_keyword_exclude']):
try:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: {article['description']}\n"
f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN")
except:
bot.send_message(TELEGRAM_CHANNEL_ID, f"*Artículo*: {article['title']}\n"
f"*Descripción*: Descripción inválida\n"
f"*Precio*: {article['price']} {article['currency']}\n"
f"[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})"
, "MARKDOWN")
time.sleep(1) # Avoid Telegram flood restriction
list.insert(0, article['id'])
except Exception as e:
print("---------- EXCEPTION -----------")
f = open("error_log.txt", "a")
f.write(f"{args['product_name']} worker crashed. {e}")
f.write(f"{args['product_name']}: Trying to parse {article['id']}: {article['title']} .")
f.close()
time.sleep(5)
exec_times.append(time.time() - start_time)
print(f"\'{args['product_name']}\' node-> last: {exec_times[-1]} max: {self.get_max_time(exec_times)} avg: {self.get_average_time(exec_times)}")
@@ -110,11 +119,11 @@ class Worker:
worker = Worker()
list = worker.first_run(args)
while True:
try:
print(f"Wallapop monitor worker started. Checking for new items containing: \'{args['product_name']}\' with given parameters periodically")
worker.work(args, list)
except Exception as e:
print(f"Exception: {e}")
print(f"{args['product_name']} worker crashed. Restarting worker...")
time.sleep(15)
#try:
print(f"Wallapop monitor worker started. Checking for new items containing: \'{args['product_name']}\' with given parameters periodically")
worker.work(args, list)
#except Exception as e:
# print(f"Exception: {e}")
# print(f"{args['product_name']} worker crashed. Restarting worker...")
# time.sleep(10)