changeset 79:5c4c57aaf7b5

Use byte offsets for Bluesky's hashtag facets
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 28 Sep 2024 09:31:09 -0700
parents 932aa9922d98
children 2a7fa4259fc8
files silorider/silos/bluesky.py
diffstat 1 files changed, 8 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/silorider/silos/bluesky.py	Wed Aug 28 09:38:33 2024 -0700
+++ b/silorider/silos/bluesky.py	Sat Sep 28 09:31:09 2024 -0700
@@ -151,15 +151,21 @@
                     first_url = url
 
         # Look for hashtags and turn them into facets too.
-        for htm in re_hashtags.finditer(entry_card.text):
+        entry_text = entry_card.text
+        for htm in re_hashtags.finditer(entry_text):
             start = htm.start()
             end = htm.end()
             tagname = htm.group()[1:]  # skip the hashtag character
 
+            # Not a very efficient way to get the byte offsets, but that will
+            # do for now.
+            byte_start = len(entry_text[:start].encode())
+            byte_end = len(entry_text[:end].encode())
+
             facet = atprotomodels.AppBskyRichtextFacet.Main(
                 features=[atprotomodels.AppBskyRichtextFacet.Tag(tag=tagname)],
                 index=atprotomodels.AppBskyRichtextFacet.ByteSlice(
-                    byteStart=start, byteEnd=end)
+                    byteStart=byte_start, byteEnd=byte_end)
                 )
             facets.append(facet)