annotate piecrust/admin/textutil.py @ 1188:a7c43131d871

bake: Fix file write flushing problem with Python 3.8+ Writing the cache files fails in Python 3.8 because it looks like flushing behaviour has changed. We need to explicitly flush. And even then, in very rare occurrences, it looks like it can still run into racing conditions, so we do a very hacky and ugly "retry" loop when fetching cached data :(
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 15 Jun 2021 22:36:23 -0700
parents 5e91bc0e3b4d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
593
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 from html.parser import HTMLParser
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 def text_preview(txt, length=100, *, max_length=None, offset=0):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 max_length = max_length or (length + 50)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6 extract = txt[offset:offset + length]
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7 if len(txt) > offset + length:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8 for i in range(offset + length,
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9 min(offset + max_length, len(txt))):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10 c = txt[i]
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 if c not in [' ', '\t', '\r', '\n']:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12 extract += c
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13 else:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 extract += '...'
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15 break
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 return extract
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 class MLStripper(HTMLParser):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 def __init__(self):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 super(MLStripper, self).__init__()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 self.reset()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 self.strict = False
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24 self.convert_charrefs = True
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 self.fed = []
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 def handle_data(self, d):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28 self.fed.append(d)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 def handle_entityref(self, name):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 self.fed.append('&%s;' % name)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 def get_data(self):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 return ''.join(self.fed)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 def html_to_text(html):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 s = MLStripper()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39 s.feed(html)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 return s.get_data()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41