annotate piecrust/admin/textutil.py @ 1183:62900c42d6dd

admin: Don't pass a custom environment to sub-processes. Apparently I was trying to be too clever, it's causing problems in some cases.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 21 May 2020 22:10:04 -0700
parents 5e91bc0e3b4d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
593
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 from html.parser import HTMLParser
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 def text_preview(txt, length=100, *, max_length=None, offset=0):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 max_length = max_length or (length + 50)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6 extract = txt[offset:offset + length]
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7 if len(txt) > offset + length:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8 for i in range(offset + length,
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9 min(offset + max_length, len(txt))):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10 c = txt[i]
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 if c not in [' ', '\t', '\r', '\n']:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12 extract += c
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13 else:
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 extract += '...'
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15 break
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 return extract
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 class MLStripper(HTMLParser):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 def __init__(self):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 super(MLStripper, self).__init__()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 self.reset()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 self.strict = False
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24 self.convert_charrefs = True
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 self.fed = []
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 def handle_data(self, d):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28 self.fed.append(d)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 def handle_entityref(self, name):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 self.fed.append('&%s;' % name)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 def get_data(self):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 return ''.join(self.fed)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 def html_to_text(html):
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 s = MLStripper()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39 s.feed(html)
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 return s.get_data()
2713b54b5d76 admin: Add summary of page in source listing.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41