Mercurial > piecrust2
comparison foodtruck/textutil.py @ 593:2713b54b5d76
admin: Add summary of page in source listing.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 19 Jan 2016 21:33:31 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
592:9428bd0025eb | 593:2713b54b5d76 |
---|---|
1 from html.parser import HTMLParser | |
2 | |
3 | |
4 def text_preview(txt, length=100, *, max_length=None, offset=0): | |
5 max_length = max_length or (length + 50) | |
6 extract = txt[offset:offset + length] | |
7 if len(txt) > offset + length: | |
8 for i in range(offset + length, | |
9 min(offset + max_length, len(txt))): | |
10 c = txt[i] | |
11 if c not in [' ', '\t', '\r', '\n']: | |
12 extract += c | |
13 else: | |
14 extract += '...' | |
15 break | |
16 return extract | |
17 | |
18 | |
19 class MLStripper(HTMLParser): | |
20 def __init__(self): | |
21 super(MLStripper, self).__init__() | |
22 self.reset() | |
23 self.strict = False | |
24 self.convert_charrefs = True | |
25 self.fed = [] | |
26 | |
27 def handle_data(self, d): | |
28 self.fed.append(d) | |
29 | |
30 def handle_entityref(self, name): | |
31 self.fed.append('&%s;' % name) | |
32 | |
33 def get_data(self): | |
34 return ''.join(self.fed) | |
35 | |
36 | |
37 def html_to_text(html): | |
38 s = MLStripper() | |
39 s.feed(html) | |
40 return s.get_data() | |
41 |