Mercurial > silorider
annotate tests/test_silos_twitter.py @ 33:9e4eb3f2754e
Improve handling of character limits in html stripping
The code now more closely keeps track of character counts during html
stripping, and should be absolutely exact. When the limit is exceeded,
it now restarts the stripping without any URLs to prevent incorrect
trimming. It also better preserves whitespace in the original post.
New tests are added for Twitter silo to ensure it works as expected.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 10 May 2023 16:10:12 -0700 |
parents | fb93d3fbff4e |
children | c5f73ebb43a5 |
rev | line source |
---|---|
2 | 1 import pytest |
2 | |
3 | |
4 def test_one_article(cli, feedutil, tweetmock): | |
5 feed = cli.createTempFeed(feedutil.makeFeed( | |
6 """<h1 class="p-name">A new article</h1> | |
7 <div class="e-content"> | |
8 <p>This is the text of the article.</p> | |
9 <p>It has 2 paragraphs.</p> | |
10 </div> | |
11 <a class="u-url" href="https://example.org/a-new-article">permalink</a>""" | |
12 )) | |
13 | |
14 cli.appendSiloConfig('test', 'twitter', url='/blah') | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
15 cli.setFeedConfig('feed', feed) |
2 | 16 tweetmock.installTokens(cli, 'test') |
17 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
18 ctx, _ = cli.run('process') |
2 | 19 assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') |
20 toot = ctx.silos[0].client.tweets[0] | |
4
c199bd681e4e
Twitter API accepts direct URLs for media.
Ludovic Chabant <ludovic@chabant.com>
parents:
2
diff
changeset
|
21 assert toot == ('A new article https://example.org/a-new-article', []) |
2 | 22 |
23 | |
24 def test_one_micropost(cli, feedutil, tweetmock): | |
25 feed = cli.createTempFeed(feedutil.makeFeed( | |
26 """<p class="p-name">This is a quick update.</p> | |
27 <a class="u-url" href="/01234.html">permalink</a>""" | |
28 )) | |
29 | |
30 cli.appendSiloConfig('test', 'twitter', url='/blah') | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
31 cli.setFeedConfig('feed', feed) |
2 | 32 tweetmock.installTokens(cli, 'test') |
33 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
34 ctx, _ = cli.run('process') |
2 | 35 assert ctx.cache.wasPosted('test', '/01234.html') |
36 toot = ctx.silos[0].client.tweets[0] | |
4
c199bd681e4e
Twitter API accepts direct URLs for media.
Ludovic Chabant <ludovic@chabant.com>
parents:
2
diff
changeset
|
37 assert toot == ("This is a quick update.", []) |
2 | 38 |
39 | |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
40 def test_one_micropost_with_mention(cli, feedutil, tweetmock): |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
41 feed = cli.createTempFeed(feedutil.makeFeed( |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
42 """<p class="p-name">Hey <a href="https://twitter.com/jack">Jacky</a> |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
43 you should fix your stuff!</p> |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
44 <a class="u-url" href="/01234.html">permalink</a>""" |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
45 )) |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
46 |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
47 cli.appendSiloConfig('test', 'twitter', url='/blah') |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
48 cli.setFeedConfig('feed', feed) |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
49 tweetmock.installTokens(cli, 'test') |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
50 |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
51 ctx, _ = cli.run('process') |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
52 assert ctx.cache.wasPosted('test', '/01234.html') |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
53 toot = ctx.silos[0].client.tweets[0] |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
54 assert toot == ("Hey @jack\nyou should fix your stuff!", []) |
25
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
55 |
fb93d3fbff4e
Support transforming twitter profile URLs into mentions.
Ludovic Chabant <ludovic@chabant.com>
parents:
18
diff
changeset
|
56 |
2 | 57 def test_one_micropost_with_one_photo(cli, feedutil, tweetmock, monkeypatch): |
58 feed = cli.createTempFeed(feedutil.makeFeed( | |
59 """<p class="p-name">This is a quick photo update.</p> | |
60 <div> | |
61 <a class="u-photo" href="/fullimg.jpg"><img src="/thumbimg.jpg"/></a> | |
62 </div> | |
63 <a class="u-url" href="/01234.html">permalink</a>""" | |
64 )) | |
65 | |
66 cli.appendSiloConfig('test', 'twitter', url='/blah') | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
67 cli.setFeedConfig('feed', feed) |
2 | 68 tweetmock.installTokens(cli, 'test') |
69 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
70 ctx, _ = cli.run('process') |
2 | 71 |
72 assert ctx.cache.wasPosted('test', '/01234.html') | |
73 toot = ctx.silos[0].client.tweets[0] | |
4
c199bd681e4e
Twitter API accepts direct URLs for media.
Ludovic Chabant <ludovic@chabant.com>
parents:
2
diff
changeset
|
74 assert toot == ("This is a quick photo update.", ['/fullimg.jpg']) |
2 | 75 |
76 | |
77 def test_one_micropost_with_two_photos(cli, feedutil, tweetmock, monkeypatch): | |
78 feed = cli.createTempFeed(feedutil.makeFeed( | |
79 """<p class="p-name">This is a photo update with 2 photos.</p> | |
80 <div> | |
81 <a class="u-photo" href="/fullimg1.jpg"><img src="/thumbimg1.jpg"/></a> | |
82 <a class="u-photo" href="/fullimg2.jpg"><img src="/thumbimg2.jpg"/></a> | |
83 </div> | |
84 <a class="u-url" href="/01234.html">permalink</a>""" | |
85 )) | |
86 | |
87 cli.appendSiloConfig('test', 'twitter', url='/blah') | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
88 cli.setFeedConfig('feed', feed) |
2 | 89 tweetmock.installTokens(cli, 'test') |
90 | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
4
diff
changeset
|
91 ctx, _ = cli.run('process') |
2 | 92 |
93 assert ctx.cache.wasPosted('test', '/01234.html') | |
94 toot = ctx.silos[0].client.tweets[0] | |
4
c199bd681e4e
Twitter API accepts direct URLs for media.
Ludovic Chabant <ludovic@chabant.com>
parents:
2
diff
changeset
|
95 assert toot == ("This is a photo update with 2 photos.", |
c199bd681e4e
Twitter API accepts direct URLs for media.
Ludovic Chabant <ludovic@chabant.com>
parents:
2
diff
changeset
|
96 ['/fullimg1.jpg', '/fullimg2.jpg']) |
2 | 97 |
98 | |
33
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
99 def test_micropost_with_long_text_and_link(cli, feedutil, tweetmock, monkeypatch): |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
100 feed = cli.createTempFeed(feedutil.makeFeed( |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
101 """<div class="p-name"> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
102 <p>This a pretty long text that has a link in it :) We want to make sure it gets to the limit of what Twitter allows, so that we can test there won't be any off-by-one errors in measurements. Here is a <a href="https://docs.python.org/3/library/textwrap.html">link to Python's textwrap module</a>, which is appropriate!!!</p> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
103 </div> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
104 <a class="u-url" href="/01234.html">permalink</a>""" |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
105 )) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
106 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
107 cli.appendSiloConfig('test', 'twitter', url='/blah') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
108 cli.setFeedConfig('feed', feed) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
109 tweetmock.installTokens(cli, 'test') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
110 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
111 ctx, _ = cli.run('process') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
112 assert ctx.cache.wasPosted('test', '/01234.html') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
113 toot = ctx.silos[0].client.tweets[0] |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
114 assert toot == ("This a pretty long text that has a link in it :) We want to make sure it gets to the limit of what Twitter allows, so that we can test there won't be any off-by-one errors in measurements. Here is a link to Python's textwrap module, which is appropriate!!! https://docs.python.org/3/library/textwrap.html", |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
115 []) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
116 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
117 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
118 def test_micropost_with_too_long_text_and_link_1(cli, feedutil, tweetmock, monkeypatch): |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
119 feed = cli.createTempFeed(feedutil.makeFeed( |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
120 """<div class="p-name"> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
121 <p>This time we have a text that's slightly too long, with <a href="https://thisdoesntmatter.com">a link here</a>. We'll be one character too long, with a short word at the end to test the shortening algorithm. Otherwise, don't worry about it. Blah blah blah. Trying to get to the limit. Almost here yes</p> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
122 </div> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
123 <a class="u-url" href="/01234.html">permalink</a>""" |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
124 )) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
125 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
126 cli.appendSiloConfig('test', 'twitter', url='/blah') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
127 cli.setFeedConfig('feed', feed) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
128 tweetmock.installTokens(cli, 'test') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
129 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
130 ctx, _ = cli.run('process') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
131 assert ctx.cache.wasPosted('test', '/01234.html') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
132 toot = ctx.silos[0].client.tweets[0] |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
133 assert toot == ("This time we have a text that's slightly too long, with a link here. We'll be one character too long, with a short word at the end to test the shortening algorithm. Otherwise, don't worry about it. Blah blah blah. Trying to get to the limit. Almost here... /01234.html", |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
134 []) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
135 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
136 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
137 def test_micropost_with_too_long_text_and_link_2(cli, feedutil, tweetmock, monkeypatch): |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
138 feed = cli.createTempFeed(feedutil.makeFeed( |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
139 """<div class="p-name"> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
140 <p>This time we have a text that's slightly too long, with <a href="https://thisdoesntmatter.com">a link here</a>. We'll be one character too long, with a loooooong word at the end to test the shortening algorithm. Otherwise, don't worry about it. Blah blah blah. Our long word is: califragilisticastuff</p> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
141 </div> |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
142 <a class="u-url" href="/01234.html">permalink</a>""" |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
143 )) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
144 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
145 cli.appendSiloConfig('test', 'twitter', url='/blah') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
146 cli.setFeedConfig('feed', feed) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
147 tweetmock.installTokens(cli, 'test') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
148 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
149 ctx, _ = cli.run('process') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
150 assert ctx.cache.wasPosted('test', '/01234.html') |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
151 toot = ctx.silos[0].client.tweets[0] |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
152 assert toot == ("This time we have a text that's slightly too long, with a link here. We'll be one character too long, with a loooooong word at the end to test the shortening algorithm. Otherwise, don't worry about it. Blah blah blah. Our long word is:... /01234.html", |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
153 []) |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
154 |
9e4eb3f2754e
Improve handling of character limits in html stripping
Ludovic Chabant <ludovic@chabant.com>
parents:
25
diff
changeset
|
155 |
2 | 156 @pytest.fixture(scope='session') |
157 def tweetmock(): | |
158 from silorider.silos.twitter import TwitterSilo | |
159 TwitterSilo._CLIENT_CLASS = TwitterMock | |
160 return TwitterMockUtil() | |
161 | |
162 | |
163 class TwitterMock: | |
164 def __init__(self, consumer_key, consumer_secret, | |
165 access_token_key, access_token_secret): | |
166 assert consumer_key == 'TEST_CLIENT_KEY' | |
167 assert consumer_secret == 'TEST_CLIENT_SECRET' | |
168 assert access_token_key == 'TEST_ACCESS_KEY' | |
169 assert access_token_secret == 'TEST_ACCESS_SECRET' | |
170 | |
171 self.tweets = [] | |
172 | |
173 def PostUpdate(self, tweet, media=None): | |
174 self.tweets.append((tweet, media)) | |
175 | |
176 | |
177 class TwitterMockUtil: | |
178 def installTokens(self, cli, silo_name): | |
179 def do_install_tokens(ctx): | |
180 ctx.cache.setCustomValue( | |
181 '%s_clienttoken' % silo_name, | |
182 'TEST_CLIENT_KEY,TEST_CLIENT_SECRET') | |
183 ctx.cache.setCustomValue( | |
184 '%s_accesstoken' % silo_name, | |
185 'TEST_ACCESS_KEY,TEST_ACCESS_SECRET') | |
186 | |
187 cli.preExecHook(do_install_tokens) |