Mercurial > silorider
changeset 46:ac5911e76ef2
Add Bluesky silo
Currently supports:
- Uploading images
- URL facets
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 20 Aug 2023 11:20:37 -0700 |
parents | 67fde62e3862 |
children | 4be321bd5ed4 |
files | silorider/silos/base.py silorider/silos/bluesky.py tests/test_silos_bluesky.py |
diffstat | 3 files changed, 355 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/silorider/silos/base.py Sun Aug 20 11:20:07 2023 -0700 +++ b/silorider/silos/base.py Sun Aug 20 11:20:37 2023 -0700 @@ -93,10 +93,11 @@ def load_silos(config, cache): from .print import PrintSilo + from .bluesky import BlueskySilo from .mastodon import MastodonSilo from .twitter import TwitterSilo from .webmention import WebmentionSilo - silo_types = [PrintSilo, MastodonSilo, TwitterSilo, WebmentionSilo] + silo_types = [PrintSilo, BlueskySilo, MastodonSilo, TwitterSilo, WebmentionSilo] silo_dict = dict([(s.SILO_TYPE, s) for s in silo_types]) silos = []
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silorider/silos/bluesky.py Sun Aug 20 11:20:37 2023 -0700 @@ -0,0 +1,169 @@ +import re +import json +import time +import urllib.parse +import getpass +import logging +import datetime +from .base import Silo, upload_silo_media +from ..format import UrlFlattener, URLMODE_ERASE + +import atproto +import atproto.xrpc_client.models as atprotomodels + + +logger = logging.getLogger(__name__) + + +class _BlueskyClient(atproto.Client): + def __init__(self, *args, **kwargs): + atproto.Client.__init__(self, *args, **kwargs) + + def send_post(self, text, embed=None, facets=None): + # Override the atproto.Client send_post function because it + # doesn't support facets yet. The code is otherwise more or + # less identical. + repo = self.me.did + langs = [atprotomodels.languages.DEFAULT_LANGUAGE_CODE1] + data = atprotomodels.ComAtprotoRepoCreateRecord.Data( + repo=repo, + collection=atprotomodels.ids.AppBskyFeedPost, + record=atprotomodels.AppBskyFeedPost.Main( + createdAt=datetime.datetime.now().isoformat(), + text=text, + facets=facets, + embed=embed, + langs=langs) + ) + self.com.atproto.repo.create_record(data) + + +class BlueskySilo(Silo): + SILO_TYPE = 'bluesky' + _DEFAULT_SERVER = 'bsky.app' + _CLIENT_CLASS = _BlueskyClient + + def __init__(self, ctx): + super().__init__(ctx) + + base_url = self.getConfigItem('url') + self.client = self._CLIENT_CLASS(base_url) + + def authenticate(self, ctx): + force = ctx.exec_ctx.args.force + + password = self.getCacheItem('password') + if not password or force: + logger.info("Authenticating client app with Bluesky for %s" % + self.ctx.silo_name) + email = input("Email: ") + password = getpass.getpass(prompt="Application password: ") + profile = self.client.login(email, password) + + logger.info("Authenticated as %s" % profile.displayName) + self.setCacheItem('email', email) + self.setCacheItem('password', password) + + def onPostStart(self, ctx): + if not ctx.args.dry_run: + email = self.getCacheItem('email') + password = self.getCacheItem('password') + if not email or not password: + raise Exception("Please authenticate Bluesky silo %s" % + self.ctx.silo_name) + self.client.login(email, password) + + def postEntry(self, entry, ctx): + # We use URLMODE_ERASE to remove all hyperlinks from the + # formatted text, and we later add them as facets to the atproto + # record. + url_flattener = BlueskyUrlFlattener() + posttxt = self.formatEntry( + entry, + limit=256, + url_flattener=url_flattener, + url_mode=URLMODE_ERASE) + if not posttxt: + raise Exception("Can't find any content to use for the post!") + + # Upload the images as blobs and add them as an embed on the + # atproto record. + images = upload_silo_media(entry, 'photo', self._media_callback) + + embed = None + if images: + embed = atprotomodels.AppBskyEmbedImages.Main(images=images) + + # Grab any URLs detected by our URL flattener and add them as + # facets on the atproto record. + facets = None + if url_flattener.urls: + facets = [] + for url_info in url_flattener.urls: + # atproto requires an http or https scheme. + start, end, url = url_info + if not url.startswith('http'): + url = 'https://' + url + + facet = atprotomodels.AppBskyRichtextFacet.Main( + features=[atprotomodels.AppBskyRichtextFacet.Link(uri=url)], + index=atprotomodels.AppBskyRichtextFacet.ByteSlice( + byteStart=start, byteEnd=end), + ) + facets.append(facet) + + # Create the record! + self.client.send_post(text=posttxt, embed=embed, facets=facets) + + def dryRunPostEntry(self, entry, ctx): + posttxt = self.formatEntry(entry, limit=256) + logger.info("Post would be:") + logger.info(posttxt) + media_urls = entry.get('photo', [], force_list=True) + if media_urls: + logger.info("...with photos: %s" % str(media_urls)) + + def _media_callback(self, tmpfile, mt, url, desc): + with open(tmpfile, 'rb') as tmpfp: + data = tmpfp.read() + + logger.debug("Uploading image to Bluesky (%d bytes) with description: %s" % + (len(data), desc)) + upload = self.client.com.atproto.repo.upload_blob(data) + + if desc is None: + desc = "" + return atprotomodels.AppBskyEmbedImages.Image(alt=desc, image=upload.blob) + + +BLUESKY_NETLOC = 'bsky.app' + +# Match both links to a profile by name, and by ID +profile_path_re = re.compile(r'/profile/([\w\d\.]+|(did\:plc\:[\w\d]+))') + + +class BlueskyUrlFlattener(UrlFlattener): + def __init__(self): + self.urls = [] + + def replaceHref(self, text, raw_url, ctx): + url = urllib.parse.urlparse(raw_url) + + # If this is a Bluesky profile URL, replace it with a mention. + if url.netloc == BLUESKY_NETLOC: + m = profile_path_re.match(url.path) + if m: + return '@' + m.group(1) + + # Otherwise, keep track of where the URL is so we can add a facet + # for it. + start = ctx.byte_length + end = start + len(text.encode()) + self.urls.append((start, end, raw_url)) + print("Gathered link: ", start, end, raw_url) + + # Always keep the text as-is. + return text + + def measureUrl(self, url): + return len(url)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_silos_bluesky.py Sun Aug 20 11:20:37 2023 -0700 @@ -0,0 +1,184 @@ +import pytest +import atproto.xrpc_client.models as atprotomodels +from .mockutil import mock_urllib + + +def test_one_article(cli, feedutil, bskymock): + feed = cli.createTempFeed(feedutil.makeFeed( + """<h1 class="p-name">A new article</h1> +<div class="e-content"> +<p>This is the text of the article.</p> +<p>It has 2 paragraphs.</p> +</div> +<a class="u-url" href="https://example.org/a-new-article">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'bluesky') + cli.setFeedConfig('feed', feed) + bskymock.installCredentials(cli, 'test') + + ctx, _ = cli.run('process') + assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') + post = ctx.silos[0].client.posts[0] + assert post == ('A new article https://example.org/a-new-article', + None, None) + + +def test_one_micropost(cli, feedutil, bskymock): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a quick update.</p> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'bluesky') + cli.setFeedConfig('feed', feed) + bskymock.installCredentials(cli, 'test') + + ctx, _ = cli.run('process') + assert ctx.cache.wasPosted('test', '/01234.html') + post = ctx.silos[0].client.posts[0] + assert post == ("This is a quick update.", None, None) + + +def test_one_micropost_with_one_photo(cli, feedutil, bskymock, monkeypatch): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a quick photo update.</p> +<div> + <a class="u-photo" href="/fullimg.jpg"><img src="/thumbimg.jpg"/></a> +</div> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'bluesky') + cli.setFeedConfig('feed', feed) + bskymock.installCredentials(cli, 'test') + + with monkeypatch.context() as m: + import silorider.silos.bluesky + mock_urllib(m) + m.setattr(silorider.silos.bluesky.BlueskySilo, '_media_callback', + _patched_media_callback) + ctx, _ = cli.run('process') + + assert ctx.cache.wasPosted('test', '/01234.html') + blob = ctx.silos[0].client.blobs[0] + assert blob == ('/retrieved/fullimg.jpg', None) + post = ctx.silos[0].client.posts[0] + embed = atprotomodels.AppBskyEmbedImages.Main(images=[1]) + assert post == ("This is a quick photo update.", embed, None) + + +def test_one_micropost_with_two_photos(cli, feedutil, bskymock, monkeypatch): + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a photo update with 2 photos.</p> +<div> + <a class="u-photo" href="/fullimg1.jpg"><img src="/thumbimg1.jpg"/></a> + <a class="u-photo" href="/fullimg2.jpg"><img src="/thumbimg2.jpg"/></a> +</div> +<a class="u-url" href="/01234.html">permalink</a>""" + )) + + cli.appendSiloConfig('test', 'bluesky') + cli.setFeedConfig('feed', feed) + bskymock.installCredentials(cli, 'test') + + with monkeypatch.context() as m: + import silorider.silos.bluesky + mock_urllib(m) + m.setattr(silorider.silos.bluesky.BlueskySilo, '_media_callback', + _patched_media_callback) + ctx, _ = cli.run('process') + + assert ctx.cache.wasPosted('test', '/01234.html') + blob = ctx.silos[0].client.blobs[0] + assert blob == ('/retrieved/fullimg1.jpg', None) + blob = ctx.silos[0].client.blobs[1] + assert blob == ('/retrieved/fullimg2.jpg', None) + post = ctx.silos[0].client.posts[0] + embed = atprotomodels.AppBskyEmbedImages.Main(images=[1, 2]) + assert post == ("This is a photo update with 2 photos.", embed, None) + + +def test_one_micropost_with_links(cli, feedutil, bskymock): + cli.appendSiloConfig('test', 'bluesky') + bskymock.installCredentials(cli, 'test') + + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="p-name">This is a link: http://example.org/blah</p> +<a class="u-url" href="/01234.html">permalink</a>""")) + + cli.setFeedConfig('feed', feed) + ctx, _ = cli.run('process') + post = ctx.silos[0].client.posts[0] + assert post[0] == "This is a link: http://example.org/blah" + assert post[2] == None + + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="e-content">This is another link: <a href="http://example.org/blah">http://example.org/blah</a></p> +<a class="u-url" href="/01234.html">permalink</a>""")) # NOQA + cli.setFeedConfig('feed', feed) + ctx, _ = cli.run('process') + post = ctx.silos[0].client.posts[0] + assert post[0] == "This is another link: http://example.org/blah" # NOQA + facet = _make_link_facet('http://example.org/blah', 22, 45) + assert post[2] == [facet] + + feed = cli.createTempFeed(feedutil.makeFeed( + """<p class="e-content">This is yet <a href="http://example.org/blah">another link</a></p> +<a class="u-url" href="/01234.html">permalink</a>""")) # NOQA + cli.setFeedConfig('feed', feed) + ctx, _ = cli.run('process') + post = ctx.silos[0].client.posts[0] + assert post[0] == "This is yet another link" # NOQA + facet = _make_link_facet('http://example.org/blah', 12, 24) + assert post[2] == [facet] + + +def _make_link_facet(url, start, end): + return atprotomodels.AppBskyRichtextFacet.Main( + features=[atprotomodels.AppBskyRichtextFacet.Link(uri=url)], + index=atprotomodels.AppBskyRichtextFacet.ByteSlice( + byteStart=start, byteEnd=end), + ) + + +def _patched_media_callback(self, tmpfile, mt, url, desc): + return self.client.upload_blob(tmpfile, desc) + + +@pytest.fixture(scope='session') +def bskymock(): + from silorider.silos.bluesky import BlueskySilo + BlueskySilo._CLIENT_CLASS = BlueskyMock + return BlueskyMockUtil() + + +class BlueskyMock: + def __init__(self, base_url): + # base_url is unused here. + self.posts = [] + self.blobs = [] + + def login(self, email, password): + assert email == 'TEST_EMAIL' + assert password == 'TEST_PASSWORD' + + def upload_blob(self, tmpfile, desc): + self.blobs.append((tmpfile, desc)) + return len(self.blobs) + + def send_post(self, text, embed=None, facets=None): + self.posts.append((text, embed, facets)) + + +class BlueskyMockUtil: + def installCredentials(self, cli, silo_name): + def do_install_credentials(ctx): + ctx.cache.setCustomValue( + '%s_email' % silo_name, + 'TEST_EMAIL') + ctx.cache.setCustomValue( + '%s_password' % silo_name, + 'TEST_PASSWORD') + + cli.preExecHook(do_install_credentials)