Mercurial > silorider
annotate tests/test_format.py @ 33:9e4eb3f2754e
Improve handling of character limits in html stripping
The code now more closely keeps track of character counts during html
stripping, and should be absolutely exact. When the limit is exceeded,
it now restarts the stripping without any URLs to prevent incorrect
trimming. It also better preserves whitespace in the original post.
New tests are added for Twitter silo to ensure it works as expected.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 10 May 2023 16:10:12 -0700 |
parents | c898b4df0f29 |
children | 486affad656e |
rev | line source |
---|---|
0 | 1 import pytest |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
2 from silorider.format import ( |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
3 format_entry, strip_html, HtmlStrippingContext, |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
4 URLMODE_INLINE, URLMODE_LAST, URLMODE_BOTTOM_LIST) |
0 | 5 |
6 | |
7 test_url = 'https://example.org/article' | |
8 | |
9 | |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
10 def _make_test_entry(best_name, is_micropost): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
11 class TestEntry: |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
12 def __init__(self): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
13 self.is_micropost = is_micropost |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
14 self.url = test_url |
0 | 15 |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
16 def get(self, _): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
17 return best_name |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
18 |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
19 def htmlFind(self, *args, **kwargs): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
20 return best_name |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
21 |
0 | 22 entry = TestEntry() |
23 return entry | |
24 | |
25 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
26 @pytest.mark.parametrize("text, expected", [ |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
27 ("<p>Something</p>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
28 "Something"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
29 ("<p>Something with <em>emphasis</em> in it</p>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
30 "Something with emphasis in it"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
31 ("<p>Something with <a href=\"http://example.org/blah\">a link</a>", |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
32 "Something with a link http://example.org/blah"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
33 ("<p>Something with a link <a href=\"http://example.org/blah\">http://example.org</a>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
34 "Something with a link http://example.org/blah"), |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
35 ("<p>Something with <a href=\"http://example.org/first\">one link here</a> and <a href=\"http://example.org/second\">another there</a>...</p>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
36 "Something with one link here http://example.org/first and another there http://example.org/second...") # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
37 ]) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
38 def test_strip_html(text, expected): |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
39 ctx = HtmlStrippingContext() |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
40 ctx.url_mode = URLMODE_INLINE |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
41 actual = strip_html(text, ctx) |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
42 print(actual) |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
43 print(expected) |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
44 assert actual == expected |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
45 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
46 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
47 @pytest.mark.parametrize("text, expected", [ |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
27
diff
changeset
|
48 ("<p>Something with <a href=\"http://example.org/blah\">a link</a></p>", |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
49 "Something with a link\nhttp://example.org/blah"), |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
27
diff
changeset
|
50 ("<p>Something with a link <a href=\"http://example.org/blah\">http://example.org</a></p>", # NOQA |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
51 "Something with a link\nhttp://example.org/blah"), |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
52 ("<p>Something with <a href=\"http://example.org/first\">one link here</a> and <a href=\"http://example.org/second\">another there</a>...</p>", # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
53 "Something with one link here and another there...\nhttp://example.org/first\nhttp://example.org/second") # NOQA |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
54 ]) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
55 def test_strip_html_with_bottom_urls(text, expected): |
27
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
56 ctx = HtmlStrippingContext() |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
57 ctx.url_mode = URLMODE_BOTTOM_LIST |
c898b4df0f29
Use context for html stripping, with support for custom URL sizes
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
58 actual = strip_html(text, ctx) |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
59 print(actual) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
60 print(expected) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
61 assert actual == expected |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
62 |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
63 |
0 | 64 @pytest.mark.parametrize("title, limit, add_url, expected", [ |
65 ('A test entry', None, False, 'A test entry'), | |
66 ('A test entry', None, 'auto', 'A test entry ' + test_url), | |
67 ('A test entry', None, True, 'A test entry ' + test_url), | |
68 | |
69 ('A test entry', 80, False, 'A test entry'), | |
70 ('A test entry', 80, 'auto', 'A test entry ' + test_url), | |
71 ('A test entry', 80, True, 'A test entry ' + test_url), | |
72 | |
73 ('A test entry that is very very long because its title has many many ' | |
74 'words in it for no good reason', 80, False, | |
75 'A test entry that is very very long because its title has many many ' | |
76 'words in...'), | |
77 ('A test entry that is very very long because its title has many many ' | |
78 'words in it for no good reason', 80, 'auto', | |
79 'A test entry that is very very long because its... ' + test_url), | |
80 ('A test entry that is very very long because its title has many many ' | |
81 'words in it for no good reason', 80, True, | |
82 'A test entry that is very very long because its... ' + test_url) | |
83 ]) | |
84 def test_format_lonform_entry(title, limit, add_url, expected): | |
85 entry = _make_test_entry(title, False) | |
86 actual = format_entry(entry, limit, add_url) | |
87 assert actual == expected | |
88 | |
89 | |
90 @pytest.mark.parametrize("text, limit, add_url, expected", [ | |
91 ('A test entry', None, False, 'A test entry'), | |
92 ('A test entry', None, 'auto', 'A test entry'), | |
93 ('A test entry', None, True, 'A test entry ' + test_url), | |
94 | |
95 ('A test entry', 80, False, 'A test entry'), | |
96 ('A test entry', 80, 'auto', 'A test entry'), | |
97 ('A test entry', 80, True, 'A test entry ' + test_url), | |
98 | |
99 ('A test entry that is very very long because its title has many many ' | |
100 'words in it for no good reason', 80, False, | |
101 'A test entry that is very very long because its title has many many ' | |
102 'words in...'), | |
103 ('A test entry that is very very long because its title has many many ' | |
104 'words in it for no good reason', 80, 'auto', | |
105 'A test entry that is very very long because its... ' + test_url), | |
106 ('A test entry that is very very long because its title has many many ' | |
107 'words in it for no good reason', 80, True, | |
108 'A test entry that is very very long because its... ' + test_url) | |
109 ]) | |
110 def test_format_micropost_entry(text, limit, add_url, expected): | |
111 entry = _make_test_entry(text, True) | |
112 actual = format_entry(entry, limit, add_url) | |
113 assert actual == expected |