view piecrust/admin/textutil.py @ 1188:a7c43131d871

bake: Fix file write flushing problem with Python 3.8+ Writing the cache files fails in Python 3.8 because it looks like flushing behaviour has changed. We need to explicitly flush. And even then, in very rare occurrences, it looks like it can still run into racing conditions, so we do a very hacky and ugly "retry" loop when fetching cached data :(
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 15 Jun 2021 22:36:23 -0700
parents 5e91bc0e3b4d
children
line wrap: on
line source

from html.parser import HTMLParser


def text_preview(txt, length=100, *, max_length=None, offset=0):
    max_length = max_length or (length + 50)
    extract = txt[offset:offset + length]
    if len(txt) > offset + length:
        for i in range(offset + length,
                       min(offset + max_length, len(txt))):
            c = txt[i]
            if c not in [' ', '\t', '\r', '\n']:
                extract += c
            else:
                extract += '...'
                break
    return extract


class MLStripper(HTMLParser):
    def __init__(self):
        super(MLStripper, self).__init__()
        self.reset()
        self.strict = False
        self.convert_charrefs = True
        self.fed = []

    def handle_data(self, d):
        self.fed.append(d)

    def handle_entityref(self, name):
        self.fed.append('&%s;' % name)

    def get_data(self):
        return ''.join(self.fed)


def html_to_text(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()