view tests/test_commands_populate.py @ 18:a921cc2306bc

Do our own HTML parsing/stripping of micropost contents. - This lets us properly handle various forms of linking. - Add tests for processing posts with links. - Fix configuration in tests. - Basic error handling for processing posts.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 16 Sep 2018 21:16:20 -0700
parents a1b7a459326a
children b739ca5feb45
line wrap: on
line source


feed1 = """
<html><body>
    <article class="h-entry">
      <h1 class="p-name">A new article</h1>
      <div class="e-content">
        <p>This is the text of the article.</p>
        <p>It has 2 paragraphs.</p>
      </div>
      <a class="u-url" href="https://example.org/a-new-article">permalink</a>
    </article>
</body></html>"""


def test_populate(cli):
    feed = cli.createTempFeed(feed1)
    cli.appendSiloConfig('test', 'print', items='name')
    cli.setFeedConfig('feed', feed)
    ctx, _ = cli.run('populate', '-s', 'test')
    assert ctx.cache.wasPosted('test', 'https://example.org/a-new-article')


feed2 = """
<html><body>
    <article class="h-entry">
      <h1 class="p-name">First article</h1>
      <div><time class="dt-published" datetime="2018-01-07T09:30:00-00:00"></time></div>
      <a class="u-url" href="https://example.org/first-article">permalink</a>
    </article>
    <article class="h-entry">
      <h1 class="p-name">Second article</h1>
      <div><time class="dt-published" datetime="2018-01-08T09:30:00-00:00"></time></div>
      <a class="u-url" href="https://example.org/second-article">permalink</a>
    </article>
    <article class="h-entry">
      <h1 class="p-name">Third article</h1>
      <div><time class="dt-published" datetime="2018-01-09T09:30:00-00:00"></time></div>
      <a class="u-url" href="https://example.org/third-article">permalink</a>
    </article>
</body></html>"""  # NOQA


def test_populate_until(cli):
    feed = cli.createTempFeed(feed2)
    cli.appendSiloConfig('test', 'print', items='name')
    cli.setFeedConfig('feed', feed)
    ctx, _ = cli.run('populate', '-s', 'test', '--until', '2018-01-08')
    assert ctx.cache.wasPosted('test', 'https://example.org/first-article')
    assert ctx.cache.wasPosted('test', 'https://example.org/second-article')
    assert not ctx.cache.wasPosted('test', 'https://example.org/third-article')