Mercurial > silorider
changeset 21:b6a127ca3727
Add debug output for when doing dry-run posts. Fix some URL formatting.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 19 Jan 2019 17:36:22 -0800 |
parents | a45587268314 |
children | 431cc200d526 |
files | silorider/commands/process.py silorider/format.py silorider/silos/base.py silorider/silos/mastodon.py silorider/silos/twitter.py |
diffstat | 5 files changed, 38 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/silorider/commands/process.py Sat Jan 19 17:35:10 2019 -0800 +++ b/silorider/commands/process.py Sat Jan 19 17:36:22 2019 -0800 @@ -39,11 +39,11 @@ def preProcess(self): for silo in self.silos: - silo.onPostStart() + silo.onPostStart(self.ctx) def postProcess(self): for silo in self.silos: - silo.onPostEnd() + silo.onPostEnd(self.ctx) def processEntry(self, entry): entry_url = entry.get('url') @@ -74,6 +74,7 @@ else: logger.info("Would post entry on %s: %s" % (silo.name, entry_url)) + silo.dryRunPostEntry(entry, postctx) else: logger.debug("Skipping already posted entry on %s: %s" % (silo.name, entry_url))
--- a/silorider/format.py Sat Jan 19 17:35:10 2019 -0800 +++ b/silorider/format.py Sat Jan 19 17:36:22 2019 -0800 @@ -8,6 +8,8 @@ def format_entry(entry, limit=None, add_url='auto'): url = entry.url name = get_best_text(entry) + if not name: + raise Exception("Can't find best text for entry: %s" % url) do_add_url = ((add_url is True) or (add_url == 'auto' and not entry.is_micropost)) @@ -36,25 +38,25 @@ def get_best_text(entry, *, plain=True, inline_urls=True): - text = entry.get('title') - if not text: - text = entry.get('name') - if not text: - text = entry.get('content') + elem = entry.htmlFind(class_='p-title') + if not elem: + elem = entry.htmlFind(class_='p-name') + if not elem: + elem = entry.htmlFind(class_='e-content') - if text: + if elem: if not plain: - return text - return strip_html(text, inline_urls=inline_urls) + text = '\n'.join([str(c) for c in elem.contents]) + return str(text) + return strip_html(elem, inline_urls=inline_urls) return None -def strip_html(txt, *, inline_urls=True): +def strip_html(bs_elem, *, inline_urls=True): outtxt = '' ctx = _HtmlStripping() - soup = bs4.BeautifulSoup(txt, 'lxml' if has_lxml else 'html5lib') - for c in soup.children: + for c in bs_elem.children: outtxt += _do_strip_html(c, ctx) keys = ['url:%d' % i for i in range(len(ctx.urls))] @@ -86,13 +88,7 @@ if len(cnts) == 1: href_txt = cnts[0].string href_parsed = urllib.parse.urlparse(href) - print("Checking:", href_txt, href_parsed.hostname) - if href_txt in [ - href, - href_parsed.netloc, - '%s://%s' % (href_parsed.scheme, href_parsed.netloc), - '%s://%s%s' % (href_parsed.scheme, href_parsed.netloc, - href_parsed.path)]: + if href_txt in href: return href a_txt = ''.join([_do_strip_html(c, ctx)
--- a/silorider/silos/base.py Sat Jan 19 17:35:10 2019 -0800 +++ b/silorider/silos/base.py Sat Jan 19 17:36:22 2019 -0800 @@ -70,13 +70,16 @@ def authenticate(self, ctx): raise NotImplementedError() - def onPostStart(self): + def onPostStart(self, ctx): pass def postEntry(self, entry, ctx): raise NotImplementedError() - def onPostEnd(self): + def dryRunPostEntry(self, entry, ctx): + pass + + def onPostEnd(self, ctx): pass
--- a/silorider/silos/mastodon.py Sat Jan 19 17:35:10 2019 -0800 +++ b/silorider/silos/mastodon.py Sat Jan 19 17:36:22 2019 -0800 @@ -82,8 +82,9 @@ self.setCacheItem('accesstoken', access_token) - def onPostStart(self): - self._ensureApp() + def onPostStart(self, ctx): + if not ctx.args.dry_run: + self._ensureApp() def _ensureApp(self): if self.client is not None: @@ -121,6 +122,11 @@ self.client.status_post(toottxt, media_ids=media_ids, visibility=visibility) + def dryRunPostEntry(self, entry, ctx): + toottxt = self.formatEntry(entry, limit=500) + logger.info("Toot would be:") + logger.info(toottxt) + def _media_callback(self, tmpfile, mt): with open(tmpfile, 'rb') as tmpfp: return self.client.media_post(tmpfp, mt)
--- a/silorider/silos/twitter.py Sat Jan 19 17:35:10 2019 -0800 +++ b/silorider/silos/twitter.py Sat Jan 19 17:36:22 2019 -0800 @@ -37,8 +37,9 @@ access_token = '%s,%s' % (access_key, access_secret) self.setCacheItem('accesstoken', access_token) - def onPostStart(self): - self._ensureClient() + def onPostStart(self, ctx): + if not ctx.args.dry_run: + self._ensureClient() def _ensureClient(self): if self.client is not None: @@ -73,3 +74,8 @@ logger.debug("Posting tweet: %s" % tweettxt) media_urls = entry.get('photo', [], force_list=True) self.client.PostUpdate(tweettxt, media=media_urls) + + def dryRunPostEntry(self, entry, ctx): + tweettxt = self.formatEntry(entry, limit=280) + logger.info("Tweet would be:") + logger.info(tweettxt)