Mercurial > silorider
annotate tests/test_commands_populate.py @ 27:c898b4df0f29
Use context for html stripping, with support for custom URL sizes
For instance in Twitter URLs are 23 characters long since they use their
own URL shortening service. Without taking this into account, post lengths
would not be calculated correctly.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 19 Apr 2023 12:46:58 -0700 |
parents | a921cc2306bc |
children | b739ca5feb45 |
rev | line source |
---|---|
0 | 1 |
2 feed1 = """ | |
3 <html><body> | |
4 <article class="h-entry"> | |
5 <h1 class="p-name">A new article</h1> | |
6 <div class="e-content"> | |
7 <p>This is the text of the article.</p> | |
8 <p>It has 2 paragraphs.</p> | |
9 </div> | |
10 <a class="u-url" href="https://example.org/a-new-article">permalink</a> | |
11 </article> | |
12 </body></html>""" | |
13 | |
14 | |
15 def test_populate(cli): | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
16 feed = cli.createTempFeed(feed1) |
0 | 17 cli.appendSiloConfig('test', 'print', items='name') |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
18 cli.setFeedConfig('feed', feed) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
19 ctx, _ = cli.run('populate', '-s', 'test') |
0 | 20 assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') |
21 | |
22 | |
23 feed2 = """ | |
24 <html><body> | |
25 <article class="h-entry"> | |
26 <h1 class="p-name">First article</h1> | |
27 <div><time class="dt-published" datetime="2018-01-07T09:30:00-00:00"></time></div> | |
28 <a class="u-url" href="https://example.org/first-article">permalink</a> | |
29 </article> | |
30 <article class="h-entry"> | |
31 <h1 class="p-name">Second article</h1> | |
32 <div><time class="dt-published" datetime="2018-01-08T09:30:00-00:00"></time></div> | |
33 <a class="u-url" href="https://example.org/second-article">permalink</a> | |
34 </article> | |
35 <article class="h-entry"> | |
36 <h1 class="p-name">Third article</h1> | |
37 <div><time class="dt-published" datetime="2018-01-09T09:30:00-00:00"></time></div> | |
38 <a class="u-url" href="https://example.org/third-article">permalink</a> | |
39 </article> | |
40 </body></html>""" # NOQA | |
41 | |
42 | |
43 def test_populate_until(cli): | |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
44 feed = cli.createTempFeed(feed2) |
0 | 45 cli.appendSiloConfig('test', 'print', items='name') |
18
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
46 cli.setFeedConfig('feed', feed) |
a921cc2306bc
Do our own HTML parsing/stripping of micropost contents.
Ludovic Chabant <ludovic@chabant.com>
parents:
0
diff
changeset
|
47 ctx, _ = cli.run('populate', '-s', 'test', '--until', '2018-01-08') |
0 | 48 assert ctx.cache.wasPosted('test', 'https://example.org/first-article') |
49 assert ctx.cache.wasPosted('test', 'https://example.org/second-article') | |
50 assert not ctx.cache.wasPosted('test', 'https://example.org/third-article') |