Mercurial > silorider
view tests/test_commands_populate.py @ 27:c898b4df0f29
Use context for html stripping, with support for custom URL sizes
For instance in Twitter URLs are 23 characters long since they use their
own URL shortening service. Without taking this into account, post lengths
would not be calculated correctly.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 19 Apr 2023 12:46:58 -0700 |
parents | a921cc2306bc |
children | b739ca5feb45 |
line wrap: on
line source
feed1 = """ <html><body> <article class="h-entry"> <h1 class="p-name">A new article</h1> <div class="e-content"> <p>This is the text of the article.</p> <p>It has 2 paragraphs.</p> </div> <a class="u-url" href="https://example.org/a-new-article">permalink</a> </article> </body></html>""" def test_populate(cli): feed = cli.createTempFeed(feed1) cli.appendSiloConfig('test', 'print', items='name') cli.setFeedConfig('feed', feed) ctx, _ = cli.run('populate', '-s', 'test') assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article') feed2 = """ <html><body> <article class="h-entry"> <h1 class="p-name">First article</h1> <div><time class="dt-published" datetime="2018-01-07T09:30:00-00:00"></time></div> <a class="u-url" href="https://example.org/first-article">permalink</a> </article> <article class="h-entry"> <h1 class="p-name">Second article</h1> <div><time class="dt-published" datetime="2018-01-08T09:30:00-00:00"></time></div> <a class="u-url" href="https://example.org/second-article">permalink</a> </article> <article class="h-entry"> <h1 class="p-name">Third article</h1> <div><time class="dt-published" datetime="2018-01-09T09:30:00-00:00"></time></div> <a class="u-url" href="https://example.org/third-article">permalink</a> </article> </body></html>""" # NOQA def test_populate_until(cli): feed = cli.createTempFeed(feed2) cli.appendSiloConfig('test', 'print', items='name') cli.setFeedConfig('feed', feed) ctx, _ = cli.run('populate', '-s', 'test', '--until', '2018-01-08') assert ctx.cache.wasPosted('test', 'https://example.org/first-article') assert ctx.cache.wasPosted('test', 'https://example.org/second-article') assert not ctx.cache.wasPosted('test', 'https://example.org/third-article')