comparison piecrust/admin/views/mentions.py @ 1114:8af2ea1f5c34

tasks: Add new `tasks` command and infrastructure, with `mention` task. * The new command lets `chef` run tasks from a queue. * The webmention endpoint now adds a mention task. * Moved mention handling code to a task runner.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 22 Feb 2018 22:12:45 -0800
parents 98c7dd6ea4ac
children
comparison
equal deleted inserted replaced
1113:29c51b981c17 1114:8af2ea1f5c34
1 import os
2 import os.path
3 import json
4 import time
5 import logging 1 import logging
6 import requests 2 from flask import g, request, make_response, abort
7 from bs4 import BeautifulSoup
8 from flask import current_app, g, request, make_response, abort
9 from ..blueprint import foodtruck_bp 3 from ..blueprint import foodtruck_bp
10 from piecrust.app import PieCrustFactory 4 from piecrust.tasks.base import TaskManager
11 from piecrust.serving.util import get_requested_page
12 5
13 6
14 logger = logging.getLogger(__name__) 7 logger = logging.getLogger(__name__)
15 8
16 9
24 abort(400) 17 abort(400)
25 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'): 18 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'):
26 logger.error("Source and target are the same.") 19 logger.error("Source and target are the same.")
27 abort(400) 20 abort(400)
28 21
29 # See if we need to do this synchronously or asynchronously, and other 22 # Create the task for handling this mention.
30 # things we should know up-front. 23 pcapp = g.site.piecrust_app
31 wmcfg = g.site.piecrust_app.config.get('webmention') 24 task_manager = TaskManager(pcapp)
32 if wmcfg.get('use_task_queue') is True: 25 task_id = task_manager.createTask('mention', {
33 tasks_dir = os.path.join(g.site.piecrust_app.root_dir, '_tasks') 26 'source': src_url,
34 _ensure_dir(tasks_dir) 27 'target': tgt_url})
35 task_data = {
36 'type': 'webmention',
37 'data': {'source': src_url, 'target': tgt_url}}
38 task_path = os.path.join(tasks_dir, '%s.json' % int(time.time()))
39 with open(task_path, 'w', encoding='utf8') as fp:
40 json.dump(task_data, fp)
41 return make_response("Webmention queued.", 202, [])
42 28
43 # Find if we have a page at the target URL. 29 # Either run the task now in a background process (for cheap and simple
44 # To do that we need to spin up a PieCrust app that knows how the website 30 # setups), or leave the task there to be picked up later when someone
45 # works. Because the website might have been baked with custom settings 31 # runs the task queue eventually.
46 # (usually the site root URL) there's a good chance we need to apply 32 wmcfg = pcapp.config.get('webmention')
47 # some variants, which the user can specify in the config. 33 if not wmcfg.get('use_task_queue'):
48 pcappfac = PieCrustFactory( 34 g.site.runTask(task_id)
49 current_app.config['FOODTRUCK_ROOT_DIR'],
50 cache_key='webmention')
51 if wmcfg.get('config_variant'):
52 pcappfac.config_variants = [wmcfg.get('config_variant')]
53 if wmcfg.get('config_variants'):
54 pcappfac.config_variants = list(wmcfg.get('config_variants'))
55 if wmcfg.get('config_values'):
56 pcappfac.config_values = list(wmcfg.get('config_values').items())
57 pcapp = pcappfac.create()
58 try:
59 req_page = get_requested_page(pcapp, tgt_url)
60 if req_page.page is None:
61 abort(404)
62 except Exception as ex:
63 logger.error("Can't check webmention target page: %s" % tgt_url)
64 logger.exception(ex)
65 abort(404)
66 35
67 # Grab the source URL's contents and see if anything references the 36 return make_response("Webmention queued.", 202, [])
68 # target (ours) URL.
69 src_t = requests.get(src_url)
70 src_html = BeautifulSoup(src_t.text, 'html.parser')
71 for link in src_html.find_all('a'):
72 href = link.get('href')
73 if href == tgt_url:
74 break
75 else:
76 logger.error("Source '%s' doesn't link to target: %s" %
77 (src_url, tgt_url))
78 abort(400)
79
80 # Find something to quote for this webmention. We find an `h-entry`
81 # to get a title, excerpt, and/or text.
82 blurb = None
83 hentry = src_html.find(class_='h-entry')
84 if hentry:
85 try:
86 pname = hentry.find(class_='p-name')
87 pauthor = hentry.find(class_='p-author')
88 blurb = {
89 'pname': _bs4_contents_str(pname),
90 'pauthor': _bs4_contents_str(pauthor)}
91 except: # NOQA
92 logger.error("Couldn't get h-entry info.")
93
94 dirname, _ = os.path.splitext(req_page.page.content_spec)
95 dirname += '-assets'
96 _ensure_dir(dirname)
97 mention_path = os.path.join(dirname, 'mentions.json')
98 try:
99 with open(mention_path, 'r', encoding='utf-8') as fp:
100 mention = json.load(fp)
101 except IOError:
102 mention = {'mentions': []}
103
104 for m in mention['mentions']:
105 if m['source'] == src_url:
106 return
107
108 new_mention = {'source': src_url}
109 if blurb:
110 new_mention.update(blurb)
111
112 mention['mentions'].append(new_mention)
113
114 with open(mention_path, 'w', encoding='utf-8') as fp:
115 json.dump(mention, fp)
116 logger.info("Received webmention from: %s" % src_url)
117
118 return make_response(("Webmention received.", 202, []))
119
120
121 def _bs4_contents_str(node):
122 return ''.join([str(c).strip() for c in node.contents])
123
124
125 def _ensure_dir(path, mode=0o775):
126 try:
127 os.makedirs(path, mode=mode, exist_ok=True)
128 except OSError:
129 pass
130