Mercurial > silorider
view silorider/commands/process.py @ 15:cb1dc5c864d8 0.2.0
Specify URLs in the config file instead of in the CLI.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 29 Jul 2018 23:59:39 -0700 |
parents | a1b7a459326a |
children | a921cc2306bc |
line wrap: on
line source
import logging from .utils import get_named_silos, get_named_urls from ..silos.base import SiloPostingContext from ..parse import parse_url logger = logging.getLogger(__name__) def process_urls(ctx): for url in get_named_urls(ctx.config, ctx.args.url): logger.info("Processing %s" % url) p = Processor(ctx, url) p.process() class Processor: def __init__(self, ctx, url): self.ctx = ctx self.url = url self._silos = get_named_silos(ctx.silos, ctx.args.silo) @property def config(self): return self.ctx.config @property def silos(self): return self._silos def process(self): self.preProcess() feed = parse_url(self.url) for entry in feed.entries: self.processEntry(entry) self.postProcess() def preProcess(self): for silo in self.silos: silo.onPostStart() def postProcess(self): for silo in self.silos: silo.onPostEnd() def processEntry(self, entry): if self.isEntryFiltered(entry): logger.debug("Entry is filtered out: %s" % entry.best_name) return entry_url = entry.get('url') if not entry_url: logger.warning("Found entry without a URL.") return postctx = SiloPostingContext(self.ctx) no_cache = self.ctx.args.no_cache logger.debug("Processing entry: %s" % entry.best_name) for silo in self.silos: if no_cache or not self.ctx.cache.wasPosted(silo.name, entry_url): if not self.ctx.args.dry_run: silo.postEntry(entry, postctx) self.ctx.cache.addPost(silo.name, entry_url) else: logger.info("Would post entry on %s: %s" % (silo.name, entry.best_name)) else: logger.debug("Skipping already posted entry on %s: %s" % (silo.name, entry.best_name)) def isEntryFiltered(self, entry): if not self.config.has_section('filter'): return False items = self.config.items('filter') for name, value in items: if name.startswith('include_'): propname = name[8:] propvalue = entry.get(propname) for inc_val in value.split(','): if inc_val in propvalue: break else: return True elif name.startswith('exclude_'): propname = name[8:] propvalue = entry.get(propname) for excl_val in value.split(','): if excl_val in propvalue: return True return False