Mercurial > silorider
annotate tests/test_format.py @ 38:0f98784bcc40
Improve handling of whitespace and paragraphs in html stripping code
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 21 May 2023 09:40:00 -0700 |
parents | 9e4eb3f2754e |
children | 486affad656e |
rev | line source |
---|---|
0 | 1 import pytest |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
2 from silorider.format import ( |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
3 format_entry, strip_html, HtmlStrippingContext, |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
4 URLMODE_INLINE, URLMODE_LAST, URLMODE_BOTTOM_LIST) |
0 | 5 |
6 | |
7 test_url = 'https://example.org/article' | |
8 | |
9 | |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
10 def _make_test_entry(best_name, is_micropost): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
11 class TestEntry: |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
12 def __init__(self): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
13 self.is_micropost = is_micropost |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
14 self.url = test_url |
0 | 15 |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
16 def get(self, _): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
17 return best_name |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
18 |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
19 def htmlFind(self, *args, **kwargs): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
20 return best_name |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
21 |
0 | 22 entry = TestEntry() |
23 return entry | |
24 | |
25 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
26 @pytest.mark.parametrize("text, expected", [ |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
27 ("<p>Something</p>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
28 "Something"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
29 ("<p>Something with <em>emphasis</em> in it</p>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
30 "Something with emphasis in it"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
31 ("<p>Something with <a href=\"http://example.org/blah\">a link</a>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
32 "Something with a link http://example.org/blah"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
33 ("<p>Something with a link <a href=\"http://example.org/blah\">http://example.org</a>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
34 "Something with a link http://example.org/blah"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
35 ("<p>Something with <a href=\"http://example.org/first\">one link here</a> and <a href=\"http://example.org/second\">another there</a>...</p>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
36 "Something with one link here http://example.org/first and another there http://example.org/second...") # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
37 ]) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
38 def test_strip_html(text, expected): |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
39 ctx = HtmlStrippingContext() |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
40 ctx.url_mode = URLMODE_INLINE |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
41 actual = strip_html(text, ctx) |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
42 print(actual) |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
43 print(expected) |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
44 assert actual == expected |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
45 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
46 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
47 @pytest.mark.parametrize("text, expected", [ |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
27
diff
changeset
|
48 ("<p>Something with <a href=\"http://example.org/blah\">a link</a></p>", |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
49 "Something with a link\nhttp://example.org/blah"), |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
27
diff
changeset
|
50 ("<p>Something with a link <a href=\"http://example.org/blah\">http://example.org</a></p>", # NOQA |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
51 "Something with a link\nhttp://example.org/blah"), |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
52 ("<p>Something with <a href=\"http://example.org/first\">one link here</a> and <a href=\"http://example.org/second\">another there</a>...</p>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
53 "Something with one link here and another there...\nhttp://example.org/first\nhttp://example.org/second") # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
54 ]) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
55 def test_strip_html_with_bottom_urls(text, expected): |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
56 ctx = HtmlStrippingContext() |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
57 ctx.url_mode = URLMODE_BOTTOM_LIST |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
58 actual = strip_html(text, ctx) |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
59 print(actual) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
60 print(expected) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
61 assert actual == expected |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
62 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
63 |
0 | 64 @pytest.mark.parametrize("title, limit, add_url, expected", [ |
65 ('A test entry', None, False, 'A test entry'), | |
66 ('A test entry', None, 'auto', 'A test entry ' + test_url), | |
67 ('A test entry', None, True, 'A test entry ' + test_url), | |
68 | |
69 ('A test entry', 80, False, 'A test entry'), | |
70 ('A test entry', 80, 'auto', 'A test entry ' + test_url), | |
71 ('A test entry', 80, True, 'A test entry ' + test_url), | |
72 | |
73 ('A test entry that is very very long because its title has many many ' | |
74 'words in it for no good reason', 80, False, | |
75 'A test entry that is very very long because its title has many many ' | |
76 'words in...'), | |
77 ('A test entry that is very very long because its title has many many ' | |
78 'words in it for no good reason', 80, 'auto', | |
79 'A test entry that is very very long because its... ' + test_url), | |
80 ('A test entry that is very very long because its title has many many ' | |
81 'words in it for no good reason', 80, True, | |
82 'A test entry that is very very long because its... ' + test_url) | |
83 ]) | |
84 def test_format_lonform_entry(title, limit, add_url, expected): | |
85 entry = _make_test_entry(title, False) | |
86 actual = format_entry(entry, limit, add_url) | |
87 assert actual == expected | |
88 | |
89 | |
90 @pytest.mark.parametrize("text, limit, add_url, expected", [ | |
91 ('A test entry', None, False, 'A test entry'), | |
92 ('A test entry', None, 'auto', 'A test entry'), | |
93 ('A test entry', None, True, 'A test entry ' + test_url), | |
94 | |
95 ('A test entry', 80, False, 'A test entry'), | |
96 ('A test entry', 80, 'auto', 'A test entry'), | |
97 ('A test entry', 80, True, 'A test entry ' + test_url), | |
98 | |
99 ('A test entry that is very very long because its title has many many ' | |
100 'words in it for no good reason', 80, False, | |
101 'A test entry that is very very long because its title has many many ' | |
102 'words in...'), | |
103 ('A test entry that is very very long because its title has many many ' | |
104 'words in it for no good reason', 80, 'auto', | |
105 'A test entry that is very very long because its... ' + test_url), | |
106 ('A test entry that is very very long because its title has many many ' | |
107 'words in it for no good reason', 80, True, | |
108 'A test entry that is very very long because its... ' + test_url) | |
109 ]) | |
110 def test_format_micropost_entry(text, limit, add_url, expected): | |
111 entry = _make_test_entry(text, True) | |
112 actual = format_entry(entry, limit, add_url) | |
113 assert actual == expected |