74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
import sys
|
|
import time
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
from .redis_queue import RedisConfigQueue
|
|
from .runner import FlowRunner
|
|
from .settings import Settings
|
|
from .storage import MongoRepository
|
|
from .variables import VariableService
|
|
from .xml_parser import XMLSiteParser
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TemplateCrawlerApp:
|
|
def __init__(self, settings: Optional[Settings] = None) -> None:
|
|
self.settings = settings or Settings.from_env()
|
|
self.queue = RedisConfigQueue(
|
|
self.settings.redis_url,
|
|
self.settings.redis_list_key,
|
|
timeout=self.settings.redis_block_timeout,
|
|
)
|
|
self.mongo = MongoRepository(
|
|
self.settings.mongo_uri,
|
|
self.settings.mongo_database,
|
|
)
|
|
self.variable_service = VariableService(self.settings.variable_service_url)
|
|
self.parser = XMLSiteParser()
|
|
self.runner = FlowRunner(
|
|
storage=self.mongo,
|
|
variable_service=self.variable_service,
|
|
)
|
|
self.http = requests.Session()
|
|
|
|
def run_forever(self) -> None:
|
|
logger.info("Template crawler started. Waiting for XML configurations...")
|
|
while True:
|
|
try:
|
|
self._iterate()
|
|
except KeyboardInterrupt:
|
|
logger.info("Received interrupt; shutting down.")
|
|
break
|
|
except Exception: # noqa: BLE001
|
|
logger.exception("Unexpected error during iteration.")
|
|
time.sleep(3)
|
|
|
|
def _iterate(self) -> None:
|
|
xml_location = self.queue.fetch()
|
|
if not xml_location:
|
|
return
|
|
logger.info("Fetched XML location: %s", xml_location)
|
|
xml_payload = self._load_xml(xml_location)
|
|
site = self.parser.parse(xml_payload)
|
|
self.runner.run_site(site)
|
|
|
|
def _load_xml(self, location: str) -> str:
|
|
logger.debug("Downloading XML from %s", location)
|
|
response = self.http.get(location, timeout=30)
|
|
response.raise_for_status()
|
|
return response.text
|
|
|
|
|
|
def configure_logging() -> None:
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
stream=sys.stdout,
|
|
)
|