comparison foodtruck/textutil.py @ 593:2713b54b5d76

admin: Add summary of page in source listing.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 19 Jan 2016 21:33:31 -0800
parents
children
comparison
equal deleted inserted replaced
592:9428bd0025eb 593:2713b54b5d76
1 from html.parser import HTMLParser
2
3
4 def text_preview(txt, length=100, *, max_length=None, offset=0):
5 max_length = max_length or (length + 50)
6 extract = txt[offset:offset + length]
7 if len(txt) > offset + length:
8 for i in range(offset + length,
9 min(offset + max_length, len(txt))):
10 c = txt[i]
11 if c not in [' ', '\t', '\r', '\n']:
12 extract += c
13 else:
14 extract += '...'
15 break
16 return extract
17
18
19 class MLStripper(HTMLParser):
20 def __init__(self):
21 super(MLStripper, self).__init__()
22 self.reset()
23 self.strict = False
24 self.convert_charrefs = True
25 self.fed = []
26
27 def handle_data(self, d):
28 self.fed.append(d)
29
30 def handle_entityref(self, name):
31 self.fed.append('&%s;' % name)
32
33 def get_data(self):
34 return ''.join(self.fed)
35
36
37 def html_to_text(html):
38 s = MLStripper()
39 s.feed(html)
40 return s.get_data()
41