# HG changeset patch # User Ludovic Chabant # Date 1727541069 25200 # Node ID 5c4c57aaf7b5ce221cffa8a3bbfce06eaaf5e236 # Parent 932aa9922d98782af93a8a9551fceb7c1877a9e5 Use byte offsets for Bluesky's hashtag facets diff -r 932aa9922d98 -r 5c4c57aaf7b5 silorider/silos/bluesky.py --- a/silorider/silos/bluesky.py Wed Aug 28 09:38:33 2024 -0700 +++ b/silorider/silos/bluesky.py Sat Sep 28 09:31:09 2024 -0700 @@ -151,15 +151,21 @@ first_url = url # Look for hashtags and turn them into facets too. - for htm in re_hashtags.finditer(entry_card.text): + entry_text = entry_card.text + for htm in re_hashtags.finditer(entry_text): start = htm.start() end = htm.end() tagname = htm.group()[1:] # skip the hashtag character + # Not a very efficient way to get the byte offsets, but that will + # do for now. + byte_start = len(entry_text[:start].encode()) + byte_end = len(entry_text[:end].encode()) + facet = atprotomodels.AppBskyRichtextFacet.Main( features=[atprotomodels.AppBskyRichtextFacet.Tag(tag=tagname)], index=atprotomodels.AppBskyRichtextFacet.ByteSlice( - byteStart=start, byteEnd=end) + byteStart=byte_start, byteEnd=byte_end) ) facets.append(facet)