Mercurial > piecrust2
annotate piecrust/admin/textutil.py @ 1188:a7c43131d871
bake: Fix file write flushing problem with Python 3.8+
Writing the cache files fails in Python 3.8 because it looks like flushing
behaviour has changed. We need to explicitly flush. And even then, in very
rare occurrences, it looks like it can still run into racing conditions,
so we do a very hacky and ugly "retry" loop when fetching cached data :(
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 15 Jun 2021 22:36:23 -0700 |
parents | 5e91bc0e3b4d |
children |
rev | line source |
---|---|
593
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 from html.parser import HTMLParser |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
2 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 def text_preview(txt, length=100, *, max_length=None, offset=0): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
5 max_length = max_length or (length + 50) |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
6 extract = txt[offset:offset + length] |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
7 if len(txt) > offset + length: |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
8 for i in range(offset + length, |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 min(offset + max_length, len(txt))): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 c = txt[i] |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 if c not in [' ', '\t', '\r', '\n']: |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 extract += c |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 else: |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 extract += '...' |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
15 break |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
16 return extract |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
17 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
18 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
19 class MLStripper(HTMLParser): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
20 def __init__(self): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
21 super(MLStripper, self).__init__() |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
22 self.reset() |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
23 self.strict = False |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
24 self.convert_charrefs = True |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
25 self.fed = [] |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
26 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
27 def handle_data(self, d): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
28 self.fed.append(d) |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
29 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
30 def handle_entityref(self, name): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
31 self.fed.append('&%s;' % name) |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
32 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
33 def get_data(self): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
34 return ''.join(self.fed) |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
35 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
36 |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
37 def html_to_text(html): |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
38 s = MLStripper() |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
39 s.feed(html) |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
40 return s.get_data() |
2713b54b5d76
admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
41 |