changeset 19:d3c4c5082bbc

Add Webmention silo.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 02 Oct 2018 22:22:31 -0700
parents a921cc2306bc
children a45587268314
files Pipfile Pipfile.lock silorider/config.py silorider/format.py silorider/silos/base.py silorider/silos/webmention.py tests/conftest.py tests/test_silos_webmention.py
diffstat 8 files changed, 175 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/Pipfile	Sun Sep 16 21:16:20 2018 -0700
+++ b/Pipfile	Tue Oct 02 22:22:31 2018 -0700
@@ -13,6 +13,7 @@
 "mf2util" = "*"
 python-dateutil = "*"
 python-twitter = "*"
+ronkyuu = "*"
 
 
 [dev-packages]
--- a/Pipfile.lock	Sun Sep 16 21:16:20 2018 -0700
+++ b/Pipfile.lock	Tue Oct 02 22:22:31 2018 -0700
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "f7bd62138b5af138cf517232c1ec83785d6b7af9adf957058f3bfef31a165440"
+            "sha256": "2107dacb7d1214bf5e04f786633d7e1b3e8315a9e4997c49129edb8ff58cb4ec"
         },
         "host-environment-markers": {
             "implementation_name": "cpython",
@@ -9,9 +9,9 @@
             "os_name": "posix",
             "platform_machine": "x86_64",
             "platform_python_implementation": "CPython",
-            "platform_release": "17.6.0",
+            "platform_release": "17.7.0",
             "platform_system": "Darwin",
-            "platform_version": "Darwin Kernel Version 17.6.0: Tue May  8 15:22:16 PDT 2018; root:xnu-4570.61.1~1/RELEASE_X86_64",
+            "platform_version": "Darwin Kernel Version 17.7.0: Thu Jun 21 22:53:14 PDT 2018; root:xnu-4570.71.2~1/RELEASE_X86_64",
             "python_full_version": "3.6.3",
             "python_version": "3.6",
             "sys_platform": "darwin"
@@ -36,20 +36,18 @@
         },
         "beautifulsoup4": {
             "hashes": [
-                "sha256:2545357585a6cc7d050d3c43a86eba2c0b91b9e7ac8a3965e64a6ead6a1a9a3d",
-                "sha256:5a3d659840960a4107047b6328d6d4cdaaee69939bf11adc07466a1856c99a80",
-                "sha256:4ddc90ad88bccc005a71d8ef32f7b1cd8f935475cd561c4122b2f87de45d28ab",
-                "sha256:272081ad78c5495ba67083a0e50920163701fa6fe67fbb5eefeb21b5dd88c40b",
-                "sha256:bd43a3b26d2886acd63070c43da821b60dea603eb6d45bab0294aac6129adbfa"
+                "sha256:f0abd31228055d698bb392a826528ea08ebb9959e6bea17c606fd9c9009db938",
+                "sha256:194ec62a25438adcb3fdb06378b26559eda1ea8a747367d34c33cef9c7f48d57",
+                "sha256:90f8e61121d6ae58362ce3bed8cd997efb00c914eae0ff3d363c32f9a9822d10"
             ],
-            "version": "==4.6.1"
+            "version": "==4.6.3"
         },
         "certifi": {
             "hashes": [
-                "sha256:9fa520c1bacfb634fa7af20a76bcbd3d5fb390481724c597da32c719a7dca4b0",
-                "sha256:13e698f54293db9f89122b0581843a782ad0934a4fe0172d2a980ba77fc61bb7"
+                "sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a",
+                "sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638"
             ],
-            "version": "==2018.4.16"
+            "version": "==2018.8.24"
         },
         "cffi": {
             "hashes": [
@@ -104,27 +102,27 @@
         },
         "cryptography": {
             "hashes": [
-                "sha256:87d092a7c2a44e5f7414ab02fb4145723ebba411425e1a99773531dd4c0e9b8d",
-                "sha256:67f7f57eae8dede577f3f7775957f5bec93edd6bdb6ce597bb5b28e1bdf3d4fb",
-                "sha256:27bb401a20a838d6d0ea380f08c6ead3ccd8c9d8a0232dc9adcc0e4994576a66",
-                "sha256:c3d945b7b577f07a477700f618f46cbc287af3a9222cd73035c6ef527ef2c363",
-                "sha256:bc2301170986ad82d9349a91eb8884e0e191209c45f5541b16aa7c0cfb135978",
-                "sha256:7ba834564daef87557e7fcd35c3c3183a4147b0b3a57314e53317360b9b201b3",
-                "sha256:9449f5d4d7c516a6118fa9210c4a00f34384cb1d2028672100ee0c6cce49d7f6",
-                "sha256:d01dfc5c2b3495184f683574e03c70022674ca9a7be88589c5aba130d835ea90",
-                "sha256:2a35b7570d8f247889784010aac8b384fd2e4a47b33e15c4a60b45a7c1944120",
-                "sha256:5eb86f03f9c4f0ac2336ac5431271072ddf7ecc76b338e26366732cfac58aa19",
-                "sha256:29720c4253263cff9aea64585adbbe85013ba647f6e98367efff9db2d7193ded",
-                "sha256:82409f5150e529d699e5c33fa8fd85e965104db03bc564f5f4b6a9199e591f7c",
-                "sha256:7d7f084cbe1fdb82be5a0545062b59b1ad3637bc5a48612ac2eb428ff31b31ea",
-                "sha256:6ec84edcbc966ae460560a51a90046503ff0b5b66157a9efc61515c68059f6c8",
-                "sha256:8c56ef989342e42b9fcaba7c74b446f0cc9bed546dd00034fa7ad66fc00307ef",
-                "sha256:42c531a6a354407f42ee07fda5c2c0dc822cf6d52744949c182f2b295fbd4183",
-                "sha256:21af753934f2f6d1a10fe8f4c0a64315af209ef6adeaee63ca349797d747d687",
-                "sha256:cee18beb4c807b5c0b178f4fa2fae03cef9d51821a358c6890f8b23465b7e5d2",
-                "sha256:c132bab45d4bd0fff1d3fe294d92b0a6eb8404e93337b3127bdec9f21de117e6"
+                "sha256:17db09db9d7c5de130023657be42689d1a5f60502a14f6f745f6f65a6b8195c0",
+                "sha256:e4aecdd9d5a3d06c337894c9a6e2961898d3f64fe54ca920a72234a3de0f9cb3",
+                "sha256:10b48e848e1edb93c1d3b797c83c72b4c387ab0eb4330aaa26da8049a6cbede0",
+                "sha256:2cbaa03ac677db6c821dac3f4cdfd1461a32d0615847eedbb0df54bb7802e1f7",
+                "sha256:31db8febfc768e4b4bd826750a70c79c99ea423f4697d1dab764eb9f9f849519",
+                "sha256:7e3b4aecc4040928efa8a7cdaf074e868af32c58ffc9bb77e7bf2c1a16783286",
+                "sha256:02602e1672b62e803e08617ec286041cc453e8d43f093a5f4162095506bc0beb",
+                "sha256:dc2d3f3b1548f4d11786616cf0f4415e25b0fbecb8a1d2cd8c07568f13fdde38",
+                "sha256:76936ec70a9b72eb8c58314c38c55a0336a2b36de0c7ee8fb874a4547cadbd39",
+                "sha256:8229ceb79a1792823d87779959184a1bf95768e9248c93ae9f97c7a2f60376a1",
+                "sha256:8a19e9f2fe69f6a44a5c156968d9fc8df56d09798d0c6a34ccc373bb186cee86",
+                "sha256:8168bcb08403ef144ff1fb880d416f49e2728101d02aaadfe9645883222c0aa5",
+                "sha256:6a88d9004310a198c474d8a822ee96a6dd6c01efe66facdf17cb692512ae5bc0",
+                "sha256:be495b8ec5a939a7605274b6e59fbc35e76f5ad814ae010eb679529671c9e119",
+                "sha256:4a510d268e55e2e067715d728e4ca6cd26a8e9f1f3d174faf88e6f2cb6b6c395",
+                "sha256:227da3a896df1106b1a69b1e319dce218fa04395e8cc78be7e31ca94c21254bc",
+                "sha256:e79ab4485b99eacb2166f3212218dd858258f374855e1568f728462b0e6ee0d9",
+                "sha256:f995d3667301e1754c57b04e0bae6f0fa9d710697a9f8d6712e8cca02550910f",
+                "sha256:8d10113ca826a4c29d5b85b2c4e045ffa8bad74fb525ee0eceb1d38d4c70dfd6"
             ],
-            "version": "==2.3"
+            "version": "==2.3.1"
         },
         "decorator": {
             "hashes": [
@@ -168,17 +166,16 @@
         },
         "mastodon.py": {
             "hashes": [
-                "sha256:0d426c37795ed24cdf7affec7b3465cb76f9afc3f1d4dfbfd389b0b4459dbf4d",
-                "sha256:339a60c4ea505dd5b6c8f6ac076ce40f9e7bdfcd72d9466869da8bf631e4b9f5"
+                "sha256:b29cb6eb268f65951d22b8cd0afbb233402e15b26bd8568f1c2ef506dbeeb1c0",
+                "sha256:3f000f34e2740a90fdadd923d1a7dd100dd62d56906c026ae725ce43301f30f6"
             ],
-            "version": "==1.3.0"
+            "version": "==1.3.1"
         },
         "mf2py": {
             "hashes": [
-                "sha256:2dc2e2a18ac457829936c24ffce8d66b44ea63f834aea1ed48dfbdf7b4951585",
-                "sha256:64cadc8a271382e075a72bca1c0dc09297f8e1de62279849bd3915418f7087de"
+                "sha256:84f1f8f2ff3f1deb1c30be497e7ccd805452996a662fd4a77f09e0105bede2c9"
             ],
-            "version": "==1.1.1"
+            "version": "==1.1.2"
         },
         "mf2util": {
             "hashes": [
@@ -235,6 +232,13 @@
             ],
             "version": "==1.0.0"
         },
+        "ronkyuu": {
+            "hashes": [
+                "sha256:5aa77b39d301bc174ab99ba8a53954627771cb501651a12103c58f51b32e84bf",
+                "sha256:85b25fef7f5fb0c93afd5377ea35b5ff72b2458f926bafdf10f0c9a1e19cab10"
+            ],
+            "version": "==0.6"
+        },
         "six": {
             "hashes": [
                 "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
@@ -260,25 +264,25 @@
     "develop": {
         "atomicwrites": {
             "hashes": [
-                "sha256:a24da68318b08ac9c9c45029f4a10371ab5b20e4226738e150e6e7c571630ae6",
-                "sha256:240831ea22da9ab882b551b31d4225591e5e447a68c5e188db5b89ca1d487585"
+                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
+                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
             ],
-            "version": "==1.1.5"
+            "version": "==1.2.1"
         },
         "attrs": {
             "hashes": [
-                "sha256:4b90b09eeeb9b88c35bc642cbac057e45a5fd85367b985bd2809c62b7b939265",
-                "sha256:e0d0eb91441a3b53dab4d9b743eafc1ac44476296a2053b6ca3af0b139faf87b"
+                "sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb",
+                "sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69"
             ],
-            "version": "==18.1.0"
+            "version": "==18.2.0"
         },
         "more-itertools": {
             "hashes": [
-                "sha256:a18d870ef2ffca2b8463c0070ad17b5978056f403fb64e3f15fe62a52db21cc0",
-                "sha256:6703844a52d3588f951883005efcf555e49566a48afd4db4e965d69b883980d3",
-                "sha256:2b6b9893337bfd9166bee6a62c2b0c9fe7735dcf85948b387ec8cba30e85d8e8"
+                "sha256:fcbfeaea0be121980e15bc97b3817b5202ca73d0eae185b4550cbfce2a3ebb3d",
+                "sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
+                "sha256:c476b5d3a34e12d40130bc2f935028b5f636df8f372dc2c1c01dc19681b2039e"
             ],
-            "version": "==4.2.0"
+            "version": "==4.3.0"
         },
         "pluggy": {
             "hashes": [
@@ -289,17 +293,17 @@
         },
         "py": {
             "hashes": [
-                "sha256:e31fb2767eb657cbde86c454f02e99cb846d3cd9d61b318525140214fdc0e98e",
-                "sha256:3fd59af7435864e1a243790d322d763925431213b6b8529c6ca71081ace3bbf7"
+                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6",
+                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1"
             ],
-            "version": "==1.5.4"
+            "version": "==1.6.0"
         },
         "pytest": {
             "hashes": [
-                "sha256:952c0389db115437f966c4c2079ae9d54714b9455190e56acebe14e8c38a7efa",
-                "sha256:341ec10361b64a24accaec3c7ba5f7d5ee1ca4cebea30f76fad3dd12db9f0541"
+                "sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18",
+                "sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349"
             ],
-            "version": "==3.6.4"
+            "version": "==3.7.4"
         },
         "pytest-runner": {
             "hashes": [
@@ -310,14 +314,14 @@
         },
         "setuptools-scm": {
             "hashes": [
-                "sha256:801b572ce15861f732cf35b583b1eee1a630fa6a8be3eff8ac5ec89a2eb5937b",
-                "sha256:032c4854409835afacedddbc8fae9ed708b31cf5b6a16de9bf3790b1e6424d08",
-                "sha256:2a99f4dc100b7fd6b190ccd870360d166cb2c2e0b201613a0b42347cd474139b",
-                "sha256:c7613068fd3fed346d14641a4305fcc3a79986985e168fb6f163b9406b9d018e",
-                "sha256:02de237097c9459984b7dfb7454c54a06235d91e707b9aa563f9f01ce670d27e",
-                "sha256:abd4b654950504f20d8ab7a50a36703c0197529831cd4f5ebc5bc682e37b95af"
+                "sha256:14db63c379b69393e9581df438e761b10d2d4060ae81b22a910d87751c0dc15a",
+                "sha256:cc6953d224a22f10e933fa2f55c95979317c55259016adcf93310ba2997febfa",
+                "sha256:c9e9e5ca820bc26793e55a06af56ad180aec3c355b77dc52959efcecda5af2d5",
+                "sha256:5be82a2168dcac8994b6db467c101019424b475e6239ec347dc719df08e6baf9",
+                "sha256:7eb101cc7412b1a2d2be9a9996508e147f91648bf2fee6222d476639b48626f5",
+                "sha256:1191f2a136b5e86f7ca8ab00a97ef7aef997131f1f6d4971be69a1ef387d8b40"
             ],
-            "version": "==3.0.4"
+            "version": "==3.1.0"
         },
         "six": {
             "hashes": [
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silorider/config.py	Tue Oct 02 22:22:31 2018 -0700
@@ -0,0 +1,6 @@
+
+try:
+    import lxml  # NOQA
+    has_lxml = True
+except ImportError:
+    has_lxml = False
--- a/silorider/format.py	Sun Sep 16 21:16:20 2018 -0700
+++ b/silorider/format.py	Tue Oct 02 22:22:31 2018 -0700
@@ -2,12 +2,7 @@
 import urllib.parse
 import textwrap
 import bs4
-
-try:
-    import lxml  # NOQA
-    _bs_parser = 'xml'
-except ImportError:
-    _bs_parser = 'html.parser'
+from .config import has_lxml
 
 
 def format_entry(entry, limit=None, add_url='auto'):
@@ -58,7 +53,7 @@
 def strip_html(txt, *, inline_urls=True):
     outtxt = ''
     ctx = _HtmlStripping()
-    soup = bs4.BeautifulSoup(txt, _bs_parser)
+    soup = bs4.BeautifulSoup(txt, 'lxml' if has_lxml else 'html5lib')
     for c in soup.children:
         outtxt += _do_strip_html(c, ctx)
 
--- a/silorider/silos/base.py	Sun Sep 16 21:16:20 2018 -0700
+++ b/silorider/silos/base.py	Tue Oct 02 22:22:31 2018 -0700
@@ -92,7 +92,8 @@
     from .print import PrintSilo
     from .mastodon import MastodonSilo
     from .twitter import TwitterSilo
-    silo_types = [PrintSilo, MastodonSilo, TwitterSilo]
+    from .webmention import WebmentionSilo
+    silo_types = [PrintSilo, MastodonSilo, TwitterSilo, WebmentionSilo]
     silo_dict = dict([(s.SILO_TYPE, s) for s in silo_types])
 
     silos = []
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silorider/silos/webmention.py	Tue Oct 02 22:22:31 2018 -0700
@@ -0,0 +1,27 @@
+import logging
+import ronkyuu
+from .base import Silo
+from ..config import has_lxml
+
+
+logger = logging.getLogger(__name__)
+
+
+class WebmentionSilo(Silo):
+    SILO_TYPE = 'webmention'
+
+    def __init__(self, ctx):
+        super().__init__(ctx)
+        self.client = None
+        ronkyuu.setParser('lxml' if has_lxml else 'html5lib')
+
+    def authenticate(self, ctx):
+        logger.info("Webmention silo doesn't require authentication.")
+
+    def postEntry(self, entry, ctx):
+        source_url = entry.url
+        logger.debug("Finding mentions in: %s" % source_url)
+        refs = ronkyuu.findMentions(source_url)
+        for r in refs.get('refs', []):
+            logger.debug("Sending webmention: %s -> %s" % (source_url, r))
+            ronkyuu.sendWebmention(source_url, r)
--- a/tests/conftest.py	Sun Sep 16 21:16:20 2018 -0700
+++ b/tests/conftest.py	Tue Oct 02 22:22:31 2018 -0700
@@ -8,6 +8,9 @@
 import silorider.main
 
 
+logger = logging.getLogger(__name__)
+
+
 # def pytest_collect_file(parent, path):
 #     if path.ext == ".html" and path.basename.startswith("feeds"):
 #         return FeedFile(path, parent)
@@ -91,7 +94,7 @@
         self._pre_hooks.append(hook)
 
     def run(self, *args):
-        pre_args = []
+        pre_args = ['-v']
         if self._cfgtxt or self._feedcfg:
             cfgtxt = self._cfgtxt
             cfgtxt += '\n\n[urls]\n'
@@ -99,11 +102,11 @@
                 cfgtxt += '%s=%s\n' % (n, u)
 
             tmpfd, tmpcfg = tempfile.mkstemp()
-            print("Creating temporary configuration file: %s" % tmpcfg)
+            logger.info("Creating temporary configuration file: %s" % tmpcfg)
             with os.fdopen(tmpfd, 'w') as tmpfp:
                 tmpfp.write(cfgtxt)
             self._cleanup.append(tmpcfg)
-            pre_args = ['-c', tmpcfg]
+            pre_args += ['-c', tmpcfg]
 
         captured = io.StringIO()
         handler = logging.StreamHandler(captured)
@@ -127,7 +130,7 @@
         silorider.main.post_exec_hook = post_exec_hook
 
         args = pre_args + list(args)
-        print("Running command: %s" % list(args))
+        logger.info("Running command: %s" % list(args))
         try:
             silorider.main._unsafe_main(args)
         finally:
@@ -136,7 +139,7 @@
 
             silorider_logger.removeHandler(handler)
 
-            print("Cleaning %d temporary files." % len(self._cleanup))
+            logger.info("Cleaning %d temporary files." % len(self._cleanup))
             for tmpname in self._cleanup:
                 try:
                     os.remove(tmpname)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_silos_webmention.py	Tue Oct 02 22:22:31 2018 -0700
@@ -0,0 +1,64 @@
+import unittest.mock
+import requests
+
+
+def test_one_article_no_mentions(cli, feedutil):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<h1 class="p-name">A new article</h1>
+<div class="e-content">
+<p>This is the abstract of the article.</p>
+<p>Read more at <a class="u-url" href="https://example.org/a-new-article">permalink</a>.</p>
+</div>
+"""  # NOQA
+    ))
+
+    cli.appendSiloConfig('test', 'webmention', url='/blah')
+    cli.setFeedConfig('feed', feed)
+
+    with unittest.mock.patch('requests.get') as mock_get, \
+            unittest.mock.patch('requests.post') as mock_post:
+        mock_get.side_effect = [
+            _MockResponse('')]
+        mock_post.side_effect = []
+        ctx, _ = cli.run('process')
+        assert mock_get.call_args_list[0][0] == ('https://example.org/a-new-article',)  # NOQA
+
+
+def test_one_article_one_mention(cli, feedutil):
+    feed = cli.createTempFeed(feedutil.makeFeed(
+        """<h1 class="p-name">A new article</h1>
+<div class="e-content">
+<p>This is the abstract of the article.</p>
+<p>Read more at <a class="u-url" href="https://example.org/a-new-article">permalink</a>.</p>
+</div>
+"""  # NOQA
+    ))
+
+    cli.appendSiloConfig('test', 'webmention', url='/blah')
+    cli.setFeedConfig('feed', feed)
+
+    with unittest.mock.patch('requests.get') as mock_get, \
+            unittest.mock.patch('requests.post') as mock_post:
+        mock_get.side_effect = [
+            _MockResponse("""
+<p>This is a reply to <a href="https://other.org/article">another article<a>.</p>
+"""),  # NOQA
+            _MockResponse("""
+<html><head>
+    <link rel="webmention" href="https://other.org/webmention">
+</head><body>
+</body></html>""")]
+        mock_post.side_effect = [
+            _MockResponse('')]
+        ctx, _ = cli.run('process')
+        assert mock_get.call_args_list[0][0] == ('https://example.org/a-new-article',)  # NOQA
+        assert mock_get.call_args_list[1][0] == ('https://other.org/article',)  # NOQA
+        assert mock_post.call_args_list[0][0] == ('https://other.org/webmention',)  # NOQA
+
+
+class _MockResponse:
+    def __init__(self, txt):
+        self.status_code = requests.codes.ok
+        self.headers = {}
+        self.history = []
+        self.text = self.content = txt