Mercurial > piecrust2
comparison piecrust/admin/views/mentions.py @ 1111:98c7dd6ea4ac
admin: Early version of webmention endpoint.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 18 Feb 2018 20:37:54 -0800 |
parents | |
children | 8af2ea1f5c34 |
comparison
equal
deleted
inserted
replaced
1110:05fba0b8e21c | 1111:98c7dd6ea4ac |
---|---|
1 import os | |
2 import os.path | |
3 import json | |
4 import time | |
5 import logging | |
6 import requests | |
7 from bs4 import BeautifulSoup | |
8 from flask import current_app, g, request, make_response, abort | |
9 from ..blueprint import foodtruck_bp | |
10 from piecrust.app import PieCrustFactory | |
11 from piecrust.serving.util import get_requested_page | |
12 | |
13 | |
14 logger = logging.getLogger(__name__) | |
15 | |
16 | |
17 @foodtruck_bp.route('/webmention', methods=['POST']) | |
18 def post_webmention(): | |
19 # Basic validation of source/target. | |
20 src_url = request.form.get('source') | |
21 tgt_url = request.form.get('target') | |
22 if not src_url or not tgt_url: | |
23 logger.error("No source or target specified.") | |
24 abort(400) | |
25 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'): | |
26 logger.error("Source and target are the same.") | |
27 abort(400) | |
28 | |
29 # See if we need to do this synchronously or asynchronously, and other | |
30 # things we should know up-front. | |
31 wmcfg = g.site.piecrust_app.config.get('webmention') | |
32 if wmcfg.get('use_task_queue') is True: | |
33 tasks_dir = os.path.join(g.site.piecrust_app.root_dir, '_tasks') | |
34 _ensure_dir(tasks_dir) | |
35 task_data = { | |
36 'type': 'webmention', | |
37 'data': {'source': src_url, 'target': tgt_url}} | |
38 task_path = os.path.join(tasks_dir, '%s.json' % int(time.time())) | |
39 with open(task_path, 'w', encoding='utf8') as fp: | |
40 json.dump(task_data, fp) | |
41 return make_response("Webmention queued.", 202, []) | |
42 | |
43 # Find if we have a page at the target URL. | |
44 # To do that we need to spin up a PieCrust app that knows how the website | |
45 # works. Because the website might have been baked with custom settings | |
46 # (usually the site root URL) there's a good chance we need to apply | |
47 # some variants, which the user can specify in the config. | |
48 pcappfac = PieCrustFactory( | |
49 current_app.config['FOODTRUCK_ROOT_DIR'], | |
50 cache_key='webmention') | |
51 if wmcfg.get('config_variant'): | |
52 pcappfac.config_variants = [wmcfg.get('config_variant')] | |
53 if wmcfg.get('config_variants'): | |
54 pcappfac.config_variants = list(wmcfg.get('config_variants')) | |
55 if wmcfg.get('config_values'): | |
56 pcappfac.config_values = list(wmcfg.get('config_values').items()) | |
57 pcapp = pcappfac.create() | |
58 try: | |
59 req_page = get_requested_page(pcapp, tgt_url) | |
60 if req_page.page is None: | |
61 abort(404) | |
62 except Exception as ex: | |
63 logger.error("Can't check webmention target page: %s" % tgt_url) | |
64 logger.exception(ex) | |
65 abort(404) | |
66 | |
67 # Grab the source URL's contents and see if anything references the | |
68 # target (ours) URL. | |
69 src_t = requests.get(src_url) | |
70 src_html = BeautifulSoup(src_t.text, 'html.parser') | |
71 for link in src_html.find_all('a'): | |
72 href = link.get('href') | |
73 if href == tgt_url: | |
74 break | |
75 else: | |
76 logger.error("Source '%s' doesn't link to target: %s" % | |
77 (src_url, tgt_url)) | |
78 abort(400) | |
79 | |
80 # Find something to quote for this webmention. We find an `h-entry` | |
81 # to get a title, excerpt, and/or text. | |
82 blurb = None | |
83 hentry = src_html.find(class_='h-entry') | |
84 if hentry: | |
85 try: | |
86 pname = hentry.find(class_='p-name') | |
87 pauthor = hentry.find(class_='p-author') | |
88 blurb = { | |
89 'pname': _bs4_contents_str(pname), | |
90 'pauthor': _bs4_contents_str(pauthor)} | |
91 except: # NOQA | |
92 logger.error("Couldn't get h-entry info.") | |
93 | |
94 dirname, _ = os.path.splitext(req_page.page.content_spec) | |
95 dirname += '-assets' | |
96 _ensure_dir(dirname) | |
97 mention_path = os.path.join(dirname, 'mentions.json') | |
98 try: | |
99 with open(mention_path, 'r', encoding='utf-8') as fp: | |
100 mention = json.load(fp) | |
101 except IOError: | |
102 mention = {'mentions': []} | |
103 | |
104 for m in mention['mentions']: | |
105 if m['source'] == src_url: | |
106 return | |
107 | |
108 new_mention = {'source': src_url} | |
109 if blurb: | |
110 new_mention.update(blurb) | |
111 | |
112 mention['mentions'].append(new_mention) | |
113 | |
114 with open(mention_path, 'w', encoding='utf-8') as fp: | |
115 json.dump(mention, fp) | |
116 logger.info("Received webmention from: %s" % src_url) | |
117 | |
118 return make_response(("Webmention received.", 202, [])) | |
119 | |
120 | |
121 def _bs4_contents_str(node): | |
122 return ''.join([str(c).strip() for c in node.contents]) | |
123 | |
124 | |
125 def _ensure_dir(path, mode=0o775): | |
126 try: | |
127 os.makedirs(path, mode=mode, exist_ok=True) | |
128 except OSError: | |
129 pass | |
130 |