Mercurial > silorider
view silorider/commands/process.py @ 60:b7da3d97ea99
Add profile URL handlers
Silos register these handlers so that everybody knows if a hyperlink is
a mention to another user on a particular social network. If any handler
matches, silos not related to that social media will skip that link.
It's possible than in rare cases we want that link everywhere, but so
far I haven't needed it, compared to all the times I didn't want these
links.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 28 Oct 2023 11:57:04 -0700 |
parents | 805c7d768caa |
children | 4caf6720d1dd |
line wrap: on
line source
import logging import dateparser from .utils import get_named_silos, get_named_urls from ..silos.base import SiloPostingContext, upload_silo_media from ..parse import parse_url logger = logging.getLogger(__name__) def process_urls(ctx): for name, url in get_named_urls(ctx.config, ctx.args.url): logger.info("Processing %s" % url) p = Processor(ctx, name, url) p.process() class Processor: def __init__(self, ctx, name, url): self.ctx = ctx self.name = name self.url = url self._silos = get_named_silos(ctx.silos, ctx.args.silo) @property def config(self): return self.ctx.config @property def silos(self): return self._silos def process(self): self.preProcess() # Get all silos to return a profile URL handler. profile_url_handlers = {} for silo in self.ctx.silos: handler = silo.getProfileUrlHandler() if handler: profile_url_handlers[silo.SILO_TYPE] = handler postctx = SiloPostingContext(self.ctx, profile_url_handlers) feed = parse_url(self.url, self.name, self.config) for entry in feed.entries: self.processEntry(postctx, entry) self.postProcess() def preProcess(self): # Pre-parse the "since" and "until" dates/times. if self.ctx.args.since: self.ctx.args.since = dateparser.parse(self.ctx.args.since) if self.ctx.args.until: self.ctx.args.until = dateparser.parse(self.ctx.args.until) # Go over the silos needed for this command (i.e. potentially # filtered by passing `-s`) and call their `onPostStart`. for silo in self.silos: silo.onPostStart(self.ctx) def postProcess(self): for silo in self.silos: silo.onPostEnd(self.ctx) def processEntry(self, postctx, entry): entry_url = entry.get('url') if not entry_url: logger.warning("Found entry without a URL: %s" % repr(entry._mf_entry)) return if self.isEntryFiltered(entry): logger.debug("Entry is filtered out: %s" % entry_url) return no_cache = self.ctx.args.no_cache only_since = self.ctx.args.since only_until = self.ctx.args.until logger.debug("Processing entry: %s" % entry_url) for silo in self.silos: if only_since or only_until: entry_dt = entry.get('published') if not entry_dt: logger.warning( "Skipping entry with no published date/time " "for %s: %s" % (silo.name, entry_url)) continue # Strip entry datetime's time-zone information if we # don't have a time-zone info from the command line. if ((only_since and not only_since.tzinfo) or (only_until and not only_until.tzinfo)): entry_dt = entry_dt.replace(tzinfo=None) if only_since and entry_dt < only_since: logger.info( "Skipping entry older than specified date/time " "for %s: %s" % (silo.name, entry_url)) continue if only_until and entry_dt > only_until: logger.info( "Skipping entry newer than specified date/time " "for %s: %s" % (silo.name, entry_url)) continue if not no_cache and self.ctx.cache.wasPosted(silo.name, entry_url): logger.debug("Skipping already posted entry on %s: %s" % (silo.name, entry_url)) continue entry_card = silo.getEntryCard(entry, postctx) if not entry_card: logger.error("Can't find any content to use for entry: %s" % entry_url) continue media_callback = silo.mediaCallback if self.ctx.args.dry_run: media_callback = silo.dryRunMediaCallback media_ids = upload_silo_media(entry_card, 'photo', media_callback) if not self.ctx.args.dry_run: logger.debug("Posting to '%s': %s" % (silo.name, entry_url)) try: did_post = silo.postEntry(entry_card, media_ids, postctx) except Exception as ex: did_post = False logger.error("Error posting: %s" % entry_url) logger.error(ex) if self.ctx.args.verbose: raise if did_post is True or did_post is None: self.ctx.cache.addPost(silo.name, entry_url) else: logger.info("Would post to '%s': %s" % (silo.name, entry_url)) silo.dryRunPostEntry(entry_card, media_ids, postctx) def isEntryFiltered(self, entry): if not self.config.has_section('filter'): return False items = self.config.items('filter') for name, value in items: if name.startswith('include_'): propname = name[8:] propvalue = entry.get(propname) for inc_val in value.split(','): if inc_val in propvalue: break else: return True elif name.startswith('exclude_'): propname = name[8:] propvalue = entry.get(propname) for excl_val in value.split(','): if excl_val in propvalue: return True return False