annotate piecrust/admin/views/mentions.py @ 1111:98c7dd6ea4ac

admin: Early version of webmention endpoint.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 18 Feb 2018 20:37:54 -0800
parents
children 8af2ea1f5c34
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1111
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import os
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2 import os.path
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import json
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import time
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 import logging
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6 import requests
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7 from bs4 import BeautifulSoup
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8 from flask import current_app, g, request, make_response, abort
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9 from ..blueprint import foodtruck_bp
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10 from piecrust.app import PieCrustFactory
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 from piecrust.serving.util import get_requested_page
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 logger = logging.getLogger(__name__)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17 @foodtruck_bp.route('/webmention', methods=['POST'])
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18 def post_webmention():
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 # Basic validation of source/target.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 src_url = request.form.get('source')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 tgt_url = request.form.get('target')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 if not src_url or not tgt_url:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 logger.error("No source or target specified.")
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24 abort(400)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26 logger.error("Source and target are the same.")
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 abort(400)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29 # See if we need to do this synchronously or asynchronously, and other
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 # things we should know up-front.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 wmcfg = g.site.piecrust_app.config.get('webmention')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32 if wmcfg.get('use_task_queue') is True:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 tasks_dir = os.path.join(g.site.piecrust_app.root_dir, '_tasks')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 _ensure_dir(tasks_dir)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35 task_data = {
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36 'type': 'webmention',
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 'data': {'source': src_url, 'target': tgt_url}}
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 task_path = os.path.join(tasks_dir, '%s.json' % int(time.time()))
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39 with open(task_path, 'w', encoding='utf8') as fp:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 json.dump(task_data, fp)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41 return make_response("Webmention queued.", 202, [])
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
42
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
43 # Find if we have a page at the target URL.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
44 # To do that we need to spin up a PieCrust app that knows how the website
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
45 # works. Because the website might have been baked with custom settings
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
46 # (usually the site root URL) there's a good chance we need to apply
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
47 # some variants, which the user can specify in the config.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
48 pcappfac = PieCrustFactory(
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
49 current_app.config['FOODTRUCK_ROOT_DIR'],
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
50 cache_key='webmention')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
51 if wmcfg.get('config_variant'):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
52 pcappfac.config_variants = [wmcfg.get('config_variant')]
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
53 if wmcfg.get('config_variants'):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
54 pcappfac.config_variants = list(wmcfg.get('config_variants'))
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
55 if wmcfg.get('config_values'):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
56 pcappfac.config_values = list(wmcfg.get('config_values').items())
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
57 pcapp = pcappfac.create()
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
58 try:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
59 req_page = get_requested_page(pcapp, tgt_url)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
60 if req_page.page is None:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
61 abort(404)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
62 except Exception as ex:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
63 logger.error("Can't check webmention target page: %s" % tgt_url)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
64 logger.exception(ex)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
65 abort(404)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 # Grab the source URL's contents and see if anything references the
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68 # target (ours) URL.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
69 src_t = requests.get(src_url)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
70 src_html = BeautifulSoup(src_t.text, 'html.parser')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
71 for link in src_html.find_all('a'):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
72 href = link.get('href')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73 if href == tgt_url:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
74 break
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
75 else:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
76 logger.error("Source '%s' doesn't link to target: %s" %
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
77 (src_url, tgt_url))
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
78 abort(400)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
79
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
80 # Find something to quote for this webmention. We find an `h-entry`
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
81 # to get a title, excerpt, and/or text.
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
82 blurb = None
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
83 hentry = src_html.find(class_='h-entry')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
84 if hentry:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
85 try:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
86 pname = hentry.find(class_='p-name')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
87 pauthor = hentry.find(class_='p-author')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
88 blurb = {
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
89 'pname': _bs4_contents_str(pname),
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
90 'pauthor': _bs4_contents_str(pauthor)}
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
91 except: # NOQA
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
92 logger.error("Couldn't get h-entry info.")
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
93
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
94 dirname, _ = os.path.splitext(req_page.page.content_spec)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
95 dirname += '-assets'
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
96 _ensure_dir(dirname)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
97 mention_path = os.path.join(dirname, 'mentions.json')
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
98 try:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
99 with open(mention_path, 'r', encoding='utf-8') as fp:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
100 mention = json.load(fp)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
101 except IOError:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
102 mention = {'mentions': []}
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
103
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
104 for m in mention['mentions']:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
105 if m['source'] == src_url:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
106 return
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
107
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
108 new_mention = {'source': src_url}
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
109 if blurb:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
110 new_mention.update(blurb)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
111
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
112 mention['mentions'].append(new_mention)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
113
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
114 with open(mention_path, 'w', encoding='utf-8') as fp:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
115 json.dump(mention, fp)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
116 logger.info("Received webmention from: %s" % src_url)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
117
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
118 return make_response(("Webmention received.", 202, []))
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
119
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
120
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
121 def _bs4_contents_str(node):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
122 return ''.join([str(c).strip() for c in node.contents])
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
123
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
124
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
125 def _ensure_dir(path, mode=0o775):
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
126 try:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
127 os.makedirs(path, mode=mode, exist_ok=True)
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
128 except OSError:
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
129 pass
98c7dd6ea4ac admin: Early version of webmention endpoint.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
130