changeset 46:ac5911e76ef2

Add Bluesky silo Currently supports: - Uploading images - URL facets
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 20 Aug 2023 11:20:37 -0700
parents 67fde62e3862
children 4be321bd5ed4
files silorider/silos/base.py silorider/silos/bluesky.py tests/test_silos_bluesky.py
diffstat 3 files changed, 355 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/silos/base.py	Sun Aug 20 11:20:07 2023 -0700
+++ b/silorider/silos/base.py	Sun Aug 20 11:20:37 2023 -0700
@@ -93,10 +93,11 @@
 
 def load_silos(config, cache):
     from .print import PrintSilo
+    from .bluesky import BlueskySilo
     from .mastodon import MastodonSilo
     from .twitter import TwitterSilo
     from .webmention import WebmentionSilo
-    silo_types = [PrintSilo, MastodonSilo, TwitterSilo, WebmentionSilo]
+    silo_types = [PrintSilo, BlueskySilo, MastodonSilo, TwitterSilo, WebmentionSilo]
     silo_dict = dict([(s.SILO_TYPE, s) for s in silo_types])
 
     silos = []
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silorider/silos/bluesky.py	Sun Aug 20 11:20:37 2023 -0700
@@ -0,0 +1,169 @@
+import re
+import json
+import time
+import urllib.parse
+import getpass
+import logging
+import datetime
+from .base import Silo, upload_silo_media
+from ..format import UrlFlattener, URLMODE_ERASE
+
+import atproto
+import atproto.xrpc_client.models as atprotomodels
+
+
+logger = logging.getLogger(__name__)
+
+
+class _BlueskyClient(atproto.Client):
+    def __init__(self, *args, **kwargs):
+        atproto.Client.__init__(self, *args, **kwargs)
+
+    def send_post(self, text, embed=None, facets=None):
+        # Override the atproto.Client send_post function because it
+        # doesn't support facets yet. The code is otherwise more or
+        # less identical.
+        repo = self.me.did
+        langs = [atprotomodels.languages.DEFAULT_LANGUAGE_CODE1]
+        data = atprotomodels.ComAtprotoRepoCreateRecord.Data(
+                repo=repo,
+                collection=atprotomodels.ids.AppBskyFeedPost,
+                record=atprotomodels.AppBskyFeedPost.Main(
+                    createdAt=datetime.datetime.now().isoformat(),
+                    text=text,
+                    facets=facets,
+                    embed=embed,
+                    langs=langs)
+                )
+        self.com.atproto.repo.create_record(data)
+
+
+class BlueskySilo(Silo):
+    SILO_TYPE = 'bluesky'
+    _DEFAULT_SERVER = 'bsky.app'
+    _CLIENT_CLASS = _BlueskyClient
+
+    def __init__(self, ctx):
+        super().__init__(ctx)
+
+        base_url = self.getConfigItem('url')
+        self.client = self._CLIENT_CLASS(base_url)
+
+    def authenticate(self, ctx):
+        force = ctx.exec_ctx.args.force
+
+        password = self.getCacheItem('password')
+        if not password or force:
+            logger.info("Authenticating client app with Bluesky for %s" %
+                        self.ctx.silo_name)
+            email = input("Email: ")
+            password = getpass.getpass(prompt="Application password: ")
+            profile = self.client.login(email, password)
+
+            logger.info("Authenticated as %s" % profile.displayName)
+            self.setCacheItem('email', email)
+            self.setCacheItem('password', password)
+
+    def onPostStart(self, ctx):
+        if not ctx.args.dry_run:
+            email = self.getCacheItem('email')
+            password = self.getCacheItem('password')
+            if not email or not password:
+                raise Exception("Please authenticate Bluesky silo %s" %
+                                self.ctx.silo_name)
+            self.client.login(email, password)
+
+    def postEntry(self, entry, ctx):
+        # We use URLMODE_ERASE to remove all hyperlinks from the
+        # formatted text, and we later add them as facets to the atproto
+        # record.
+        url_flattener = BlueskyUrlFlattener()
+        posttxt = self.formatEntry(
+            entry,
+            limit=256,
+            url_flattener=url_flattener,
+            url_mode=URLMODE_ERASE)
+        if not posttxt:
+            raise Exception("Can't find any content to use for the post!")
+
+        # Upload the images as blobs and add them as an embed on the
+        # atproto record.
+        images = upload_silo_media(entry, 'photo', self._media_callback)
+
+        embed = None
+        if images:
+            embed = atprotomodels.AppBskyEmbedImages.Main(images=images)
+
+        # Grab any URLs detected by our URL flattener and add them as
+        # facets on the atproto record.
+        facets = None
+        if url_flattener.urls:
+            facets = []
+            for url_info in url_flattener.urls:
+                # atproto requires an http or https scheme.
+                start, end, url = url_info
+                if not url.startswith('http'):
+                    url = 'https://' + url
+
+                facet = atprotomodels.AppBskyRichtextFacet.Main(
+                    features=[atprotomodels.AppBskyRichtextFacet.Link(uri=url)],
+                    index=atprotomodels.AppBskyRichtextFacet.ByteSlice(
+                        byteStart=start, byteEnd=end),
+                    )
+                facets.append(facet)
+
+        # Create the record!
+        self.client.send_post(text=posttxt, embed=embed, facets=facets)
+
+    def dryRunPostEntry(self, entry, ctx):
+        posttxt = self.formatEntry(entry, limit=256)
+        logger.info("Post would be:")
+        logger.info(posttxt)
+        media_urls = entry.get('photo', [], force_list=True)
+        if media_urls:
+            logger.info("...with photos: %s" % str(media_urls))
+
+    def _media_callback(self, tmpfile, mt, url, desc):
+        with open(tmpfile, 'rb') as tmpfp:
+            data = tmpfp.read()
+
+        logger.debug("Uploading image to Bluesky (%d bytes) with description: %s" %
+                     (len(data), desc))
+        upload = self.client.com.atproto.repo.upload_blob(data)
+
+        if desc is None:
+            desc = ""
+        return atprotomodels.AppBskyEmbedImages.Image(alt=desc, image=upload.blob)
+
+
+BLUESKY_NETLOC = 'bsky.app'
+
+# Match both links to a profile by name, and by ID
+profile_path_re = re.compile(r'/profile/([\w\d\.]+|(did\:plc\:[\w\d]+))')
+
+
+class BlueskyUrlFlattener(UrlFlattener):
+    def __init__(self):
+        self.urls = []
+
+    def replaceHref(self, text, raw_url, ctx):
+        url = urllib.parse.urlparse(raw_url)
+
+        # If this is a Bluesky profile URL, replace it with a mention.
+        if url.netloc == BLUESKY_NETLOC:
+            m = profile_path_re.match(url.path)
+            if m:
+                return '@' + m.group(1)
+
+        # Otherwise, keep track of where the URL is so we can add a facet
+        # for it.
+        start = ctx.byte_length
+        end = start + len(text.encode())
+        self.urls.append((start, end, raw_url))
+        print("Gathered link: ", start, end, raw_url)
+
+        # Always keep the text as-is.
+        return text
+
+    def measureUrl(self, url):
+        return len(url)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_silos_bluesky.py	Sun Aug 20 11:20:37 2023 -0700
@@ -0,0 +1,184 @@
+import pytest
+import atproto.xrpc_client.models as atprotomodels
+from .mockutil import mock_urllib
+
+
+def test_one_article(cli, feedutil, bskymock):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<h1 class="p-name">A new article</h1>
+<div class="e-content">
+<p>This is the text of the article.</p>
+<p>It has 2 paragraphs.</p>
+</div>
+<a class="u-url" href="https://example.org/a-new-article">permalink</a>"""
+    ))
+
+    cli.appendSiloConfig('test', 'bluesky')
+    cli.setFeedConfig('feed', feed)
+    bskymock.installCredentials(cli, 'test')
+
+    ctx, _ = cli.run('process')
+    assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article')
+    post = ctx.silos[0].client.posts[0]
+    assert post == ('A new article https://example.org/a-new-article',
+                    None, None)
+
+
+def test_one_micropost(cli, feedutil, bskymock):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="p-name">This is a quick update.</p>
+<a class="u-url" href="/01234.html">permalink</a>"""
+    ))
+
+    cli.appendSiloConfig('test', 'bluesky')
+    cli.setFeedConfig('feed', feed)
+    bskymock.installCredentials(cli, 'test')
+
+    ctx, _ = cli.run('process')
+    assert ctx.cache.wasPosted('test', '/01234.html')
+    post = ctx.silos[0].client.posts[0]
+    assert post == ("This is a quick update.", None, None)
+
+
+def test_one_micropost_with_one_photo(cli, feedutil, bskymock, monkeypatch):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="p-name">This is a quick photo update.</p>
+<div>
+    <a class="u-photo" href="/fullimg.jpg"><img src="/thumbimg.jpg"/></a>
+</div>
+<a class="u-url" href="/01234.html">permalink</a>"""
+    ))
+
+    cli.appendSiloConfig('test', 'bluesky')
+    cli.setFeedConfig('feed', feed)
+    bskymock.installCredentials(cli, 'test')
+
+    with monkeypatch.context() as m:
+        import silorider.silos.bluesky
+        mock_urllib(m)
+        m.setattr(silorider.silos.bluesky.BlueskySilo, '_media_callback',
+                  _patched_media_callback)
+        ctx, _ = cli.run('process')
+
+    assert ctx.cache.wasPosted('test', '/01234.html')
+    blob = ctx.silos[0].client.blobs[0]
+    assert blob == ('/retrieved/fullimg.jpg', None)
+    post = ctx.silos[0].client.posts[0]
+    embed = atprotomodels.AppBskyEmbedImages.Main(images=[1])
+    assert post == ("This is a quick photo update.", embed, None)
+
+
+def test_one_micropost_with_two_photos(cli, feedutil, bskymock, monkeypatch):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="p-name">This is a photo update with 2 photos.</p>
+<div>
+    <a class="u-photo" href="/fullimg1.jpg"><img src="/thumbimg1.jpg"/></a>
+    <a class="u-photo" href="/fullimg2.jpg"><img src="/thumbimg2.jpg"/></a>
+</div>
+<a class="u-url" href="/01234.html">permalink</a>"""
+    ))
+
+    cli.appendSiloConfig('test', 'bluesky')
+    cli.setFeedConfig('feed', feed)
+    bskymock.installCredentials(cli, 'test')
+
+    with monkeypatch.context() as m:
+        import silorider.silos.bluesky
+        mock_urllib(m)
+        m.setattr(silorider.silos.bluesky.BlueskySilo, '_media_callback',
+                  _patched_media_callback)
+        ctx, _ = cli.run('process')
+
+    assert ctx.cache.wasPosted('test', '/01234.html')
+    blob = ctx.silos[0].client.blobs[0]
+    assert blob == ('/retrieved/fullimg1.jpg', None)
+    blob = ctx.silos[0].client.blobs[1]
+    assert blob == ('/retrieved/fullimg2.jpg', None)
+    post = ctx.silos[0].client.posts[0]
+    embed = atprotomodels.AppBskyEmbedImages.Main(images=[1, 2])
+    assert post == ("This is a photo update with 2 photos.", embed, None)
+
+
+def test_one_micropost_with_links(cli, feedutil, bskymock):
+    cli.appendSiloConfig('test', 'bluesky')
+    bskymock.installCredentials(cli, 'test')
+
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="p-name">This is a link: http://example.org/blah</p>
+<a class="u-url" href="/01234.html">permalink</a>"""))
+
+    cli.setFeedConfig('feed', feed)
+    ctx, _ = cli.run('process')
+    post = ctx.silos[0].client.posts[0]
+    assert post[0] == "This is a link: http://example.org/blah"
+    assert post[2] == None
+
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="e-content">This is another link: <a href="http://example.org/blah">http://example.org/blah</a></p>
+<a class="u-url" href="/01234.html">permalink</a>"""))  # NOQA
+    cli.setFeedConfig('feed', feed)
+    ctx, _ = cli.run('process')
+    post = ctx.silos[0].client.posts[0]
+    assert post[0] == "This is another link: http://example.org/blah"  # NOQA
+    facet = _make_link_facet('http://example.org/blah', 22, 45)
+    assert post[2] == [facet]
+
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<p class="e-content">This is yet <a href="http://example.org/blah">another link</a></p>
+<a class="u-url" href="/01234.html">permalink</a>"""))  # NOQA
+    cli.setFeedConfig('feed', feed)
+    ctx, _ = cli.run('process')
+    post = ctx.silos[0].client.posts[0]
+    assert post[0] == "This is yet another link"  # NOQA
+    facet = _make_link_facet('http://example.org/blah', 12, 24)
+    assert post[2] == [facet]
+
+
+def _make_link_facet(url, start, end):
+    return atprotomodels.AppBskyRichtextFacet.Main(
+        features=[atprotomodels.AppBskyRichtextFacet.Link(uri=url)],
+        index=atprotomodels.AppBskyRichtextFacet.ByteSlice(
+            byteStart=start, byteEnd=end),
+        )
+
+
+def _patched_media_callback(self, tmpfile, mt, url, desc):
+    return self.client.upload_blob(tmpfile, desc)
+
+
+@pytest.fixture(scope='session')
+def bskymock():
+    from silorider.silos.bluesky import BlueskySilo
+    BlueskySilo._CLIENT_CLASS = BlueskyMock
+    return BlueskyMockUtil()
+
+
+class BlueskyMock:
+    def __init__(self, base_url):
+        # base_url is unused here.
+        self.posts = []
+        self.blobs = []
+
+    def login(self, email, password):
+        assert email == 'TEST_EMAIL'
+        assert password == 'TEST_PASSWORD'
+
+    def upload_blob(self, tmpfile, desc):
+        self.blobs.append((tmpfile, desc))
+        return len(self.blobs)
+
+    def send_post(self, text, embed=None, facets=None):
+        self.posts.append((text, embed, facets))
+
+
+class BlueskyMockUtil:
+    def installCredentials(self, cli, silo_name):
+        def do_install_credentials(ctx):
+            ctx.cache.setCustomValue(
+                '%s_email' % silo_name,
+                'TEST_EMAIL')
+            ctx.cache.setCustomValue(
+                '%s_password' % silo_name,
+                'TEST_PASSWORD')
+
+        cli.preExecHook(do_install_credentials)