import json import logging from logging.handlers import RotatingFileHandler from concurrent.futures import ThreadPoolExecutor import os import shutil import yaml from datalayer.item_monitor import ItemMonitor from datalayer.general_monitor import GeneralMonitor from managers.worker import Worker from managers.queue_manager import QueueManager from managers.article_cache import create_article_cache def initialize_config_files(): """ Inicializa los archivos de configuración si no existen, copiando los archivos .sample correspondientes. """ base_dir = os.path.dirname(os.path.abspath(__file__)) config_files = [ ('config.yaml', 'config.sample.yaml'), ('workers.json', 'workers.sample.json') ] for config_file, sample_file in config_files: config_path = os.path.join(base_dir, config_file) sample_path = os.path.join(base_dir, sample_file) if not os.path.exists(config_path): if os.path.exists(sample_path): shutil.copy2(sample_path, config_path) print(f"✓ Archivo '{config_file}' creado desde '{sample_file}'") print(f" Por favor, edita '{config_file}' con tu configuración antes de continuar.") else: raise FileNotFoundError( f"No se encontró ni '{config_file}' ni '{sample_file}'. " f"Por favor, crea uno de estos archivos." ) def configure_logger(): import os logging.getLogger("httpx").setLevel(logging.WARNING) console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(logging.Formatter('%(levelname)s [%(asctime)s] %(name)s - %(message)s')) # Determinar la ruta del archivo de log # En Docker, usar /app/logs si existe, sino usar el directorio actual if os.path.isdir('/app/logs'): log_path = '/app/logs/monitor.log' else: log_path = 'monitor.log' # Asegurarse de que el directorio existe log_dir = os.path.dirname(log_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) file_handler = RotatingFileHandler(log_path, maxBytes=10e6) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) # Configure the root logger with both handlers logging.basicConfig(level=logging.NOTSET, handlers=[console_handler, file_handler]) def parse_items_to_monitor(): import os base_dir = os.path.dirname(os.path.abspath(__file__)) workers_path = os.path.join(base_dir, "workers.json") with open(workers_path) as f: args = json.load(f) if 'items' not in args: raise ValueError("Missing mandatory field: items") items = [ItemMonitor.load_from_json(item) for item in args['items']] general_args = GeneralMonitor.load_from_json(args['general']) return items, general_args def load_cache_config(): """Carga la configuración del cache desde config.yaml""" base_dir = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(base_dir, "config.yaml") logger = logging.getLogger(__name__) try: with open(config_path, 'r') as f: config = yaml.safe_load(f) cache_config = config.get('cache', {}) cache_type = cache_config.get('type', 'memory') if cache_type == 'memory': memory_config = cache_config.get('memory', {}) return { 'cache_type': 'memory', 'limit': memory_config.get('limit', 300) } elif cache_type == 'redis': redis_config = cache_config.get('redis', {}) return { 'cache_type': 'redis', 'redis_host': redis_config.get('host', 'localhost'), 'redis_port': redis_config.get('port', 6379), 'redis_db': redis_config.get('db', 0), 'redis_password': redis_config.get('password') } else: logger.warning(f"Tipo de cache desconocido: {cache_type}, usando 'memory'") return { 'cache_type': 'memory', 'limit': 300 } except Exception as e: logger.warning(f"Error cargando configuración de cache, usando valores por defecto (memory): {e}") return { 'cache_type': 'memory', 'limit': 300 } if __name__ == "__main__": initialize_config_files() configure_logger() items, general_args = parse_items_to_monitor() # Cargar configuración de cache y crear ArticleCache cache_config = load_cache_config() cache_type = cache_config['cache_type'] # Crear kwargs sin cache_type cache_kwargs = {k: v for k, v in cache_config.items() if k != 'cache_type'} article_cache = create_article_cache(cache_type, **cache_kwargs) # Crear QueueManager con ArticleCache queue_manager = QueueManager(article_cache) with ThreadPoolExecutor(max_workers=1000) as executor: for item in items: worker = Worker(item, general_args, queue_manager) executor.submit(worker.run)