changeset 35:7965adc14569

Handle alt-attributes for images This lets us upload media with a description text on Mastodon.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 10 May 2023 16:11:11 -0700
parents 8c513e43673d
children 8e8541ef85b7
files silorider/parse.py silorider/silos/base.py silorider/silos/mastodon.py silorider/silos/twitter.py tests/test_silos_mastodon.py
diffstat 5 files changed, 64 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/parse.py	Wed May 10 16:10:35 2023 -0700
+++ b/silorider/parse.py	Wed May 10 16:11:11 2023 -0700
@@ -44,7 +44,8 @@
         params = {'doc': obj}
     else:
         params = {'url': url_or_path}
-    return mf2py.Parser(html_parser='html5lib', **params)
+    return mf2py.Parser(
+            html_parser='html5lib', img_with_alt=True, **params)
 
 
 class InvalidEntryException(Exception):
@@ -172,3 +173,17 @@
             e_and_el = (e, els[next_el[entry_type]])
             self.entries.append(e_and_el)
             next_el[entry_type] += 1
+
+
+def strip_img_alt(photos):
+    if not isinstance(photos, list):
+        raise Exception("Expected list of media items, got: %s" % photos)
+    urls = []
+    for photo in photos:
+        if isinstance(photo, dict):
+            urls.append(photo['value'])
+        elif isinstance(photo, str):
+            urls.append(photo)
+        else:
+            raise Exception("Unexpected media item: %s" % photo)
+    return urls
--- a/silorider/silos/base.py	Wed May 10 16:10:35 2023 -0700
+++ b/silorider/silos/base.py	Wed May 10 16:11:11 2023 -0700
@@ -120,18 +120,21 @@
 
 
 def upload_silo_media(entry, propname, callback):
+    # The provided callback must take the parameters:
+    #  tmpfile path, mimetype, original media url, media description
     media_ids = None
-    urls = entry.get(propname, [], force_list=True)
-    if urls:
+    media_entries = entry.get(propname, [], force_list=True)
+    if media_entries:
         media_ids = []
-        for url in urls:
-            mid = _do_upload_silo_media(url, callback)
+        for media_entry in media_entries:
+            url, desc = _img_url_and_alt(media_entry)
+            mid = _do_upload_silo_media(url, desc, callback)
             if mid is not None:
                 media_ids.append(mid)
     return media_ids
 
 
-def _do_upload_silo_media(url, callback):
+def _do_upload_silo_media(url, desc, callback):
     logger.debug("Downloading %s for upload to silo..." % url)
     mt, enc = mimetypes.guess_type(url, strict=False)
     if not mt:
@@ -144,7 +147,18 @@
     try:
         tmpfile, headers = urllib.request.urlretrieve(url)
         logger.debug("Using temporary file: %s" % tmpfile)
-        return callback(tmpfile, mt)
+        return callback(tmpfile, mt, url, desc)
     finally:
         logger.debug("Cleaning up.")
         urllib.request.urlcleanup()
+
+
+def _img_url_and_alt(media_entry):
+    # If an image has an alt attribute, the entry comes as a dictionary
+    # with 'value' for the url and 'alt' for the description.
+    if isinstance(media_entry, str):
+        return media_entry, None
+    if isinstance(media_entry, dict):
+        logger.debug("Got alt text for image! %s" % media_entry['alt'])
+        return media_entry['value'], media_entry['alt']
+    raise Exception("Unexpected media entry: %s" % media_entry)
--- a/silorider/silos/mastodon.py	Wed May 10 16:10:35 2023 -0700
+++ b/silorider/silos/mastodon.py	Wed May 10 16:11:11 2023 -0700
@@ -1,3 +1,4 @@
+import time
 import getpass
 import logging
 import mastodon
@@ -118,9 +119,26 @@
 
         media_ids = upload_silo_media(entry, 'photo', self._media_callback)
 
+        tries_left = 5
         logger.debug("Posting toot: %s" % toottxt)
-        self.client.status_post(toottxt, media_ids=media_ids,
-                                visibility=visibility)
+        while tries_left > 0:
+            try:
+                self.client.status_post(toottxt, media_ids=media_ids,
+                                        visibility=visibility)
+                break # if we got here without an exception, it's all good!
+            except mastodon.MastodonAPIError as merr:
+                if merr.args[1] == 422 and media_ids:
+                    # Unprocessable entity error. This happens if we have
+                    # uploaded some big images and the server is still
+                    # processing them.  In this case, let's wait a second and
+                    # try again.
+                    logger.debug(
+                        "Server may still be processing media... waiting"
+                        "to retry")
+                    time.sleep(1)
+                    tries_left -= 1
+                    continue
+                raise
 
     def dryRunPostEntry(self, entry, ctx):
         toottxt = self.formatEntry(entry, limit=500)
@@ -130,6 +148,8 @@
         if media_urls:
             logger.info("...with photos: %s" % str(media_urls))
 
-    def _media_callback(self, tmpfile, mt):
+    def _media_callback(self, tmpfile, mt, url, desc):
         with open(tmpfile, 'rb') as tmpfp:
-            return self.client.media_post(tmpfp, mime_type=mt)
+            logger.debug("Uploading to mastodon with description: %s" % desc)
+            return self.client.media_post(
+                    tmpfp, mime_type=mt, description=desc)
--- a/silorider/silos/twitter.py	Wed May 10 16:10:35 2023 -0700
+++ b/silorider/silos/twitter.py	Wed May 10 16:11:11 2023 -0700
@@ -3,6 +3,7 @@
 import urllib.parse
 from .base import Silo
 from ..format import UrlFlattener
+from ..parse import strip_img_alt
 
 
 logger = logging.getLogger(__name__)
@@ -76,6 +77,7 @@
 
         logger.debug("Posting tweet: %s" % tweettxt)
         media_urls = entry.get('photo', [], force_list=True)
+        media_urls = strip_img_alt(media_urls)
         self.client.PostUpdate(tweettxt, media=media_urls)
 
     def dryRunPostEntry(self, entry, ctx):
@@ -84,6 +86,7 @@
         logger.info("Tweet would be:")
         logger.info(tweettxt)
         media_urls = entry.get('photo', [], force_list=True)
+        media_urls = strip_img_alt(media_urls)
         if media_urls:
             logger.info("...with photos: %s" % str(media_urls))
 
--- a/tests/test_silos_mastodon.py	Wed May 10 16:10:35 2023 -0700
+++ b/tests/test_silos_mastodon.py	Wed May 10 16:11:11 2023 -0700
@@ -125,7 +125,7 @@
     assert toot == ("This is yet another link http://example.org/blah", None, 'public')  # NOQA
 
 
-def _patched_media_callback(self, tmpfile, mt):
+def _patched_media_callback(self, tmpfile, mt, url, desc):
     return self.client.media_post(tmpfile, mt)