changeset 21:b6a127ca3727

Add debug output for when doing dry-run posts. Fix some URL formatting.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 19 Jan 2019 17:36:22 -0800
parents a45587268314
children 431cc200d526
files silorider/commands/process.py silorider/format.py silorider/silos/base.py silorider/silos/mastodon.py silorider/silos/twitter.py
diffstat 5 files changed, 38 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/commands/process.py	Sat Jan 19 17:35:10 2019 -0800
+++ b/silorider/commands/process.py	Sat Jan 19 17:36:22 2019 -0800
@@ -39,11 +39,11 @@
 
     def preProcess(self):
         for silo in self.silos:
-            silo.onPostStart()
+            silo.onPostStart(self.ctx)
 
     def postProcess(self):
         for silo in self.silos:
-            silo.onPostEnd()
+            silo.onPostEnd(self.ctx)
 
     def processEntry(self, entry):
         entry_url = entry.get('url')
@@ -74,6 +74,7 @@
                 else:
                     logger.info("Would post entry on %s: %s" %
                                 (silo.name, entry_url))
+                    silo.dryRunPostEntry(entry, postctx)
             else:
                 logger.debug("Skipping already posted entry on %s: %s" %
                              (silo.name, entry_url))
--- a/silorider/format.py	Sat Jan 19 17:35:10 2019 -0800
+++ b/silorider/format.py	Sat Jan 19 17:36:22 2019 -0800
@@ -8,6 +8,8 @@
 def format_entry(entry, limit=None, add_url='auto'):
     url = entry.url
     name = get_best_text(entry)
+    if not name:
+        raise Exception("Can't find best text for entry: %s" % url)
 
     do_add_url = ((add_url is True) or
                   (add_url == 'auto' and not entry.is_micropost))
@@ -36,25 +38,25 @@
 
 
 def get_best_text(entry, *, plain=True, inline_urls=True):
-    text = entry.get('title')
-    if not text:
-        text = entry.get('name')
-        if not text:
-            text = entry.get('content')
+    elem = entry.htmlFind(class_='p-title')
+    if not elem:
+        elem = entry.htmlFind(class_='p-name')
+    if not elem:
+        elem = entry.htmlFind(class_='e-content')
 
-    if text:
+    if elem:
         if not plain:
-            return text
-        return strip_html(text, inline_urls=inline_urls)
+            text = '\n'.join([str(c) for c in elem.contents])
+            return str(text)
+        return strip_html(elem, inline_urls=inline_urls)
 
     return None
 
 
-def strip_html(txt, *, inline_urls=True):
+def strip_html(bs_elem, *, inline_urls=True):
     outtxt = ''
     ctx = _HtmlStripping()
-    soup = bs4.BeautifulSoup(txt, 'lxml' if has_lxml else 'html5lib')
-    for c in soup.children:
+    for c in bs_elem.children:
         outtxt += _do_strip_html(c, ctx)
 
     keys = ['url:%d' % i for i in range(len(ctx.urls))]
@@ -86,13 +88,7 @@
         if len(cnts) == 1:
             href_txt = cnts[0].string
             href_parsed = urllib.parse.urlparse(href)
-            print("Checking:", href_txt, href_parsed.hostname)
-            if href_txt in [
-                    href,
-                    href_parsed.netloc,
-                    '%s://%s' % (href_parsed.scheme, href_parsed.netloc),
-                    '%s://%s%s' % (href_parsed.scheme, href_parsed.netloc,
-                                   href_parsed.path)]:
+            if href_txt in href:
                 return href
 
         a_txt = ''.join([_do_strip_html(c, ctx)
--- a/silorider/silos/base.py	Sat Jan 19 17:35:10 2019 -0800
+++ b/silorider/silos/base.py	Sat Jan 19 17:36:22 2019 -0800
@@ -70,13 +70,16 @@
     def authenticate(self, ctx):
         raise NotImplementedError()
 
-    def onPostStart(self):
+    def onPostStart(self, ctx):
         pass
 
     def postEntry(self, entry, ctx):
         raise NotImplementedError()
 
-    def onPostEnd(self):
+    def dryRunPostEntry(self, entry, ctx):
+        pass
+
+    def onPostEnd(self, ctx):
         pass
 
 
--- a/silorider/silos/mastodon.py	Sat Jan 19 17:35:10 2019 -0800
+++ b/silorider/silos/mastodon.py	Sat Jan 19 17:36:22 2019 -0800
@@ -82,8 +82,9 @@
 
             self.setCacheItem('accesstoken', access_token)
 
-    def onPostStart(self):
-        self._ensureApp()
+    def onPostStart(self, ctx):
+        if not ctx.args.dry_run:
+            self._ensureApp()
 
     def _ensureApp(self):
         if self.client is not None:
@@ -121,6 +122,11 @@
         self.client.status_post(toottxt, media_ids=media_ids,
                                 visibility=visibility)
 
+    def dryRunPostEntry(self, entry, ctx):
+        toottxt = self.formatEntry(entry, limit=500)
+        logger.info("Toot would be:")
+        logger.info(toottxt)
+
     def _media_callback(self, tmpfile, mt):
         with open(tmpfile, 'rb') as tmpfp:
             return self.client.media_post(tmpfp, mt)
--- a/silorider/silos/twitter.py	Sat Jan 19 17:35:10 2019 -0800
+++ b/silorider/silos/twitter.py	Sat Jan 19 17:36:22 2019 -0800
@@ -37,8 +37,9 @@
             access_token = '%s,%s' % (access_key, access_secret)
             self.setCacheItem('accesstoken', access_token)
 
-    def onPostStart(self):
-        self._ensureClient()
+    def onPostStart(self, ctx):
+        if not ctx.args.dry_run:
+            self._ensureClient()
 
     def _ensureClient(self):
         if self.client is not None:
@@ -73,3 +74,8 @@
         logger.debug("Posting tweet: %s" % tweettxt)
         media_urls = entry.get('photo', [], force_list=True)
         self.client.PostUpdate(tweettxt, media=media_urls)
+
+    def dryRunPostEntry(self, entry, ctx):
+        tweettxt = self.formatEntry(entry, limit=280)
+        logger.info("Tweet would be:")
+        logger.info(tweettxt)