changeset 80:2a7fa4259fc8 draft default tip master

Improve upload of preview images.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 30 Nov 2024 14:14:54 -0800
parents 5c4c57aaf7b5
children
files silorider/commands/process.py silorider/silos/base.py silorider/silos/bluesky.py
diffstat 3 files changed, 49 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/commands/process.py	Sat Sep 28 09:31:09 2024 -0700
+++ b/silorider/commands/process.py	Sat Nov 30 14:14:54 2024 -0800
@@ -1,7 +1,7 @@
 import logging
 import dateparser
 from .utils import get_named_silos, get_named_urls
-from ..silos.base import SiloPostingContext, upload_silo_media
+from ..silos.base import SiloPostingContext, upload_silo_media_for_card
 from ..parse import parse_url
 
 
@@ -126,7 +126,7 @@
             if self.ctx.args.dry_run:
                 media_callback = silo.dryRunMediaCallback
                 max_size = None
-            media_ids = upload_silo_media(entry_card, 'photo', media_callback, max_size)
+            media_ids = upload_silo_media_for_card(entry_card, 'photo', media_callback, max_size)
 
             if not self.ctx.args.dry_run:
                 logger.debug("Posting to '%s': %s" % (silo.name, entry_url))
--- a/silorider/silos/base.py	Sat Sep 28 09:31:09 2024 -0700
+++ b/silorider/silos/base.py	Sat Nov 30 14:14:54 2024 -0800
@@ -1,6 +1,7 @@
 import os
 import os.path
 import uuid
+import signal
 import urllib.request
 import logging
 import tempfile
@@ -161,7 +162,7 @@
     return silos
 
 
-def upload_silo_media(card, propname, callback, max_size=None):
+def upload_silo_media_for_card(card, propname, callback, max_size=None):
     # The provided callback must take the parameters:
     #  tmpfile path, mimetype, original media url, media description
     with tempfile.TemporaryDirectory(prefix='SiloRider') as tmpdir:
@@ -186,6 +187,13 @@
     return media_ids
 
 
+def upload_silo_media_from_url(url, description, callback, max_size=None):
+    # The provided callback should fill the same requirements as in
+    # upload_silo_media_for_card.
+    with tempfile.TemporaryDirectory(prefix='SiloRider') as tmpdir:
+        return _do_upload_silo_media(tmpdir, url, description, callback, max_size)
+
+
 def _do_upload_silo_media(tmpdir, url, desc, callback, max_size=None):
     logger.debug("Downloading %s for upload to silo..." % url)
     mt, enc = mimetypes.guess_type(url, strict=False)
@@ -199,13 +207,14 @@
     try:
         tmpfile = os.path.join(tmpdir, str(uuid.uuid4()) + ext)
         logger.debug("Downloading photo to temporary file: %s" % tmpfile)
-        tmpfile, headers = urllib.request.urlretrieve(url, filename=tmpfile)
-        tmpfile = _ensure_file_not_too_large(tmpfile, max_size)
-        return callback(tmpfile, mt, url, desc)
+        with SignalTimeout(10, "Photo download timed out!") as sto:
+            tmpfile, headers = urllib.request.urlretrieve(url, filename=tmpfile)
+            tmpfile = _ensure_file_not_too_large(tmpfile, max_size)
+            return callback(tmpfile, mt, url, desc)
     finally:
         logger.debug("Cleaning up.")
         urllib.request.urlcleanup()
-
+    return None
 
 def _ensure_file_not_too_large(path, max_size):
     if max_size is None:
@@ -249,3 +258,20 @@
         logger.debug("Got alt text for image! %s" % media_entry['alt'])
         return media_entry['value'], media_entry['alt']
     raise Exception("Unexpected media entry: %s" % media_entry)
+
+
+class SignalTimeout:
+    def __init__(self, seconds, error_message):
+        self.seconds = seconds
+        self.error_message = error_message
+
+    def __enter__(self):
+        signal.signal(signal.SIGALRM, self._onTimeout)
+        signal.alarm(self.seconds)
+
+    def __exit__(self, type, value, traceback):
+        signal.alarm(0)
+
+    def _onTimeout(self, signum, frame):
+        raise TimeoutError(self.error_message)
+
--- a/silorider/silos/bluesky.py	Sat Sep 28 09:31:09 2024 -0700
+++ b/silorider/silos/bluesky.py	Sat Nov 30 14:14:54 2024 -0800
@@ -5,14 +5,13 @@
 import json
 import time
 import random
-import signal
 import urllib.error
 import urllib.parse
 import urllib.request
 import getpass
 import logging
 import datetime
-from .base import Silo
+from .base import Silo, SignalTimeout, upload_silo_media_from_url
 from ..config import has_lxml
 from ..format import CardProps, UrlFlattener, URLMODE_ERASE
 
@@ -122,6 +121,15 @@
             desc = ""
         return atprotomodels.AppBskyEmbedImages.Image(alt=desc, image=upload.blob)
 
+    def blobCallback(self, tmpfile, mt, url, _):
+        with open(tmpfile, 'rb') as tmpfp:
+            data = tmpfp.read()
+
+        logger.debug("Uploading blob to Bluesky (%d bytes)" % len(data))
+        upload = self.client.com.atproto.repo.upload_blob(data)
+
+        return upload.blob
+
     def postEntry(self, entry_card, media_ids, ctx):
         # Add images as an embed on the atproto record.
         embed = None
@@ -287,17 +295,13 @@
                 "image '%s'" % (embed_title, embed_description, embed_image))
 
         # Upload the thumbnail image to Bluesky.
-        embed_thumb_blob = None
+        # We use blobCallback here because the thumbnail is meant to be a raw
+        # blob, not an image, apparently.
+        embed_id = None
         if embed_image:
             try:
-                thumb_req = _build_http_request(embed_image)
-                with SignalTimeout(6, "urlopen timed out!") as sto:
-                    with urlopen(thumb_req, timeout=5) as thumb_resp:
-                        thumb_data = thumb_rest.read()
-                        logger.debug(
-                                "Uploading embed image '%s' to Bluesky (%d bytes)" %
-                                (embed_image, len(thumb_data)))
-                        embed_thumb_blob = self.client.com.atproto.repo.upload_blob(thumb_data)
+                embed_id = upload_silo_media_from_url(
+                        embed_image, None, self.blobCallback, self.PHOTO_LIMIT)
             except Exception as ex:
                 logger.warning(
                         "Couldn't fetch thumbnail URL '%s' to build Bluesky embed" %
@@ -310,7 +314,7 @@
                     title=embed_title,
                     description=embed_description,
                     uri=url,
-                    thumb=embed_thumb_blob))
+                    thumb=embed_id))
         return embed
 
 
@@ -323,22 +327,6 @@
     return req
 
 
-class SignalTimeout:
-    def __init__(self, seconds, error_message):
-        self.seconds = seconds
-        self.error_message = error_message
-
-    def __enter__(self):
-        signal.signal(signal.SIGALRM, self._onTimeout)
-        signal.alarm(self.seconds)
-
-    def __exit__(self, type, value, traceback):
-        signal.alarm(0)
-
-    def _onTimeout(self, signum, frame):
-        raise TimeoutError(self.error_message)
-
-
 _user_agents = [
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',