Mercurial > silorider

--- a/silorider/format.py	Tue Dec 26 16:30:36 2023 -0800
+++ b/silorider/format.py	Tue Dec 26 16:32:17 2023 -0800
@@ -74,6 +74,7 @@
                 ctx.limit = limit
                 if url_flattener:
                     ctx.url_flattener = url_flattener
+                    url_flattener.reset()
                 card.text = get_best_text(entry, ctx)
             else:
                 # We need to shorten the blurb! We can't do much else besides
@@ -81,7 +82,12 @@
                 card.text = card.text[:limit]

     # Actually add the url to the original post now.
+    # We pass it through the URL flattener in case it needs to do extra
+    # stuff with it (for instance the Bluesky silo will remember the
+    # byte offsets to insert a hyperlink).
     if do_add_url and url:
+        ctx.reportAddedText(1)
+        url = _process_end_url(url, ctx)
         card.text += ' ' + url
     return card

@@ -122,6 +128,9 @@
     def measureUrl(self, url):
         raise NotImplementedError()

+    def reset(self):
+        pass
+

 class _NullUrlFlattener(UrlFlattener):
     def replaceHref(self, text, url, ctx):
@@ -156,36 +165,56 @@
         self.nosp_urls = []

         # Accumulated text length when accounting for shortened URLs
-        self.text_length = 0
+        self._text_length = 0
         # Same, but computed in bytes, as per UTF8 encoding
-        self.byte_length = 0
+        self._byte_length = 0
         # Whether limit was reached
-        self.limit_reached = False
+        self._limit_reached = False
+
+    @property
+    def text_length(self):
+        return self._text_length
+
+    @property
+    def byte_length(self):
+        return self._byte_length
+
+    @property
+    def limit_reached(self):
+        return self._limit_reached

     def processText(self, txt, allow_shorten=True):
         added_len = len(txt)
-        next_text_length = self.text_length + added_len
+        next_text_length = self._text_length + added_len
         if self.limit <= 0 or next_text_length <= self.limit:
-            self.text_length = next_text_length
-            self.byte_length += len(txt.encode())
+            self._text_length = next_text_length
+            self._byte_length += len(txt.encode())
             return txt

         if allow_shorten:
-            max_allowed = self.limit - self.text_length
+            max_allowed = self.limit - self._text_length
             short_txt = textwrap.shorten(
                 txt,
                 width=max_allowed,
                 expand_tabs=False,
                 replace_whitespace=False,
                 placeholder="...")
-            self.text_length += len(short_txt)
-            self.byte_length += len(short_txt.encode())
-            self.limit_reached = True
+            self._text_length += len(short_txt)
+            self._byte_length += len(short_txt.encode())
+            self._limit_reached = True
             return short_txt
         else:
-            self.limit_reached = True
+            self._limit_reached = True
             return ''

+    def reportSetText(self, charlen, bytelen=None):
+        self._text_length = charlen
+        self._byte_length = bytelen if bytelen is not None else charlen
+
+    def reportAddedText(self, added_chars, added_bytes=None):
+        self._text_length += added_chars
+        self._byte_length += added_bytes if added_bytes is not None else added_chars
+

 def get_best_text(entry, ctx=None, *, plain=True):
     elem = entry.htmlFind(class_='p-title')
@@ -220,7 +249,7 @@

     if desc:
         logger.debug("Found card info, description: %s (image: %s)" % (desc, img))
-        ctx.text_length = len(desc)
+        ctx.reportSetText(len(desc), len(desc.encode('utf8')))
         return CardInfo(entry, desc, img, 'card')
     return None

@@ -261,7 +290,7 @@
             #       too long because of this, but that's desirable.
             if outtxt[-1] not in string.whitespace:
                 outtxt += ' '
-            outtxt += ' '.join(ctx.urls)
+            outtxt += ' '.join([_process_end_url(url, ctx) for url in ctx.urls])
         elif ctx.url_mode == URLMODE_BOTTOM_LIST:
             # If the last character of the text is a whitespace, replace
             # it with a newline.
@@ -271,30 +300,33 @@
                 outtxt = outtxt[:-1] + '\n'
             else:
                 outtxt += '\n'
-            outtxt += '\n'.join(ctx.urls)
+            outtxt += '\n'.join([_process_end_url(url, ctx) for url in ctx.urls])
     # else, if url_mode is URLMODE_ERASE, don't do anything: we have
     # removed the markers and don't need to add the URLs anywhere.
+    # TODO: if using URLMODE_INLINE we don't process the URLs via the flatterners

     if ctx.url_mode != URLMODE_ERASE:
         # Add the length of URLs to the text length.
         for url in ctx.urls:
             url_len = ctx.url_flattener.measureUrl(url)
-            ctx.text_length += url_len
-            ctx.byte_length += url_len
+            ctx.reportAddedText(url_len)
         # Add spaces and other extra characters to the text length.
         if ctx.url_mode == URLMODE_INLINE:
             # One space per URL except the explicitly no-space-urls.
             added_spaces = len(ctx.urls) - len(ctx.nosp_urls)
-            ctx.text_length += added_spaces
-            ctx.byte_length += added_spaces
+            ctx.reportAddedText(added_spaces)
         else:
             # One space or newline per URL.
             added_spaces = len(ctx.urls)
-            ctx.text_length += added_spaces
-            ctx.byte_length += added_spaces
+            ctx.reportAddedText(added_spaces)
     return outtxt


+def _process_end_url(url, ctx):
+    new_url = ctx.url_flattener.replaceHref(url, url, ctx)
+    return new_url if new_url is not None else url
+
+
 def _escape_percents(txt):
     return txt.replace('%', '%%')

@@ -354,15 +386,10 @@
                 return a_txt

         # Use the URL flattener to reformat the hyperlink.
-        old_text_length = ctx.text_length
         href_flattened = ctx.url_flattener.replaceHref(a_txt, href, ctx)
         if href_flattened is not None:
-            # We have a reformatted URL. Use that, but check if the
-            # flattener computed a custom text length. If not, do the
-            # standard computation.
-            if ctx.text_length == old_text_length:
-                return ctx.processText(href_flattened, False)
-            return href_flattened
+            # We have a reformatted URL, use that.
+            return ctx.processText(href_flattened, False)

         # If we have a simple hyperlink where the text is a substring of
         # the target URL, just return the URL.
--- a/silorider/silos/bluesky.py	Tue Dec 26 16:30:36 2023 -0800
+++ b/silorider/silos/bluesky.py	Tue Dec 26 16:32:17 2023 -0800
@@ -175,3 +175,7 @@

     def measureUrl(self, url):
         return len(url)
+
+    def reset(self):
+        self.urls = []
+