Mercurial > piecrust2
comparison piecrust/admin/views/mentions.py @ 1114:8af2ea1f5c34
tasks: Add new `tasks` command and infrastructure, with `mention` task.
* The new command lets `chef` run tasks from a queue.
* The webmention endpoint now adds a mention task.
* Moved mention handling code to a task runner.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Thu, 22 Feb 2018 22:12:45 -0800 |
parents | 98c7dd6ea4ac |
children |
comparison
equal
deleted
inserted
replaced
1113:29c51b981c17 | 1114:8af2ea1f5c34 |
---|---|
1 import os | |
2 import os.path | |
3 import json | |
4 import time | |
5 import logging | 1 import logging |
6 import requests | 2 from flask import g, request, make_response, abort |
7 from bs4 import BeautifulSoup | |
8 from flask import current_app, g, request, make_response, abort | |
9 from ..blueprint import foodtruck_bp | 3 from ..blueprint import foodtruck_bp |
10 from piecrust.app import PieCrustFactory | 4 from piecrust.tasks.base import TaskManager |
11 from piecrust.serving.util import get_requested_page | |
12 | 5 |
13 | 6 |
14 logger = logging.getLogger(__name__) | 7 logger = logging.getLogger(__name__) |
15 | 8 |
16 | 9 |
24 abort(400) | 17 abort(400) |
25 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'): | 18 if src_url.lower().rstrip('/') == tgt_url.lower().rstrip('/'): |
26 logger.error("Source and target are the same.") | 19 logger.error("Source and target are the same.") |
27 abort(400) | 20 abort(400) |
28 | 21 |
29 # See if we need to do this synchronously or asynchronously, and other | 22 # Create the task for handling this mention. |
30 # things we should know up-front. | 23 pcapp = g.site.piecrust_app |
31 wmcfg = g.site.piecrust_app.config.get('webmention') | 24 task_manager = TaskManager(pcapp) |
32 if wmcfg.get('use_task_queue') is True: | 25 task_id = task_manager.createTask('mention', { |
33 tasks_dir = os.path.join(g.site.piecrust_app.root_dir, '_tasks') | 26 'source': src_url, |
34 _ensure_dir(tasks_dir) | 27 'target': tgt_url}) |
35 task_data = { | |
36 'type': 'webmention', | |
37 'data': {'source': src_url, 'target': tgt_url}} | |
38 task_path = os.path.join(tasks_dir, '%s.json' % int(time.time())) | |
39 with open(task_path, 'w', encoding='utf8') as fp: | |
40 json.dump(task_data, fp) | |
41 return make_response("Webmention queued.", 202, []) | |
42 | 28 |
43 # Find if we have a page at the target URL. | 29 # Either run the task now in a background process (for cheap and simple |
44 # To do that we need to spin up a PieCrust app that knows how the website | 30 # setups), or leave the task there to be picked up later when someone |
45 # works. Because the website might have been baked with custom settings | 31 # runs the task queue eventually. |
46 # (usually the site root URL) there's a good chance we need to apply | 32 wmcfg = pcapp.config.get('webmention') |
47 # some variants, which the user can specify in the config. | 33 if not wmcfg.get('use_task_queue'): |
48 pcappfac = PieCrustFactory( | 34 g.site.runTask(task_id) |
49 current_app.config['FOODTRUCK_ROOT_DIR'], | |
50 cache_key='webmention') | |
51 if wmcfg.get('config_variant'): | |
52 pcappfac.config_variants = [wmcfg.get('config_variant')] | |
53 if wmcfg.get('config_variants'): | |
54 pcappfac.config_variants = list(wmcfg.get('config_variants')) | |
55 if wmcfg.get('config_values'): | |
56 pcappfac.config_values = list(wmcfg.get('config_values').items()) | |
57 pcapp = pcappfac.create() | |
58 try: | |
59 req_page = get_requested_page(pcapp, tgt_url) | |
60 if req_page.page is None: | |
61 abort(404) | |
62 except Exception as ex: | |
63 logger.error("Can't check webmention target page: %s" % tgt_url) | |
64 logger.exception(ex) | |
65 abort(404) | |
66 | 35 |
67 # Grab the source URL's contents and see if anything references the | 36 return make_response("Webmention queued.", 202, []) |
68 # target (ours) URL. | |
69 src_t = requests.get(src_url) | |
70 src_html = BeautifulSoup(src_t.text, 'html.parser') | |
71 for link in src_html.find_all('a'): | |
72 href = link.get('href') | |
73 if href == tgt_url: | |
74 break | |
75 else: | |
76 logger.error("Source '%s' doesn't link to target: %s" % | |
77 (src_url, tgt_url)) | |
78 abort(400) | |
79 | |
80 # Find something to quote for this webmention. We find an `h-entry` | |
81 # to get a title, excerpt, and/or text. | |
82 blurb = None | |
83 hentry = src_html.find(class_='h-entry') | |
84 if hentry: | |
85 try: | |
86 pname = hentry.find(class_='p-name') | |
87 pauthor = hentry.find(class_='p-author') | |
88 blurb = { | |
89 'pname': _bs4_contents_str(pname), | |
90 'pauthor': _bs4_contents_str(pauthor)} | |
91 except: # NOQA | |
92 logger.error("Couldn't get h-entry info.") | |
93 | |
94 dirname, _ = os.path.splitext(req_page.page.content_spec) | |
95 dirname += '-assets' | |
96 _ensure_dir(dirname) | |
97 mention_path = os.path.join(dirname, 'mentions.json') | |
98 try: | |
99 with open(mention_path, 'r', encoding='utf-8') as fp: | |
100 mention = json.load(fp) | |
101 except IOError: | |
102 mention = {'mentions': []} | |
103 | |
104 for m in mention['mentions']: | |
105 if m['source'] == src_url: | |
106 return | |
107 | |
108 new_mention = {'source': src_url} | |
109 if blurb: | |
110 new_mention.update(blurb) | |
111 | |
112 mention['mentions'].append(new_mention) | |
113 | |
114 with open(mention_path, 'w', encoding='utf-8') as fp: | |
115 json.dump(mention, fp) | |
116 logger.info("Received webmention from: %s" % src_url) | |
117 | |
118 return make_response(("Webmention received.", 202, [])) | |
119 | |
120 | |
121 def _bs4_contents_str(node): | |
122 return ''.join([str(c).strip() for c in node.contents]) | |
123 | |
124 | |
125 def _ensure_dir(path, mode=0o775): | |
126 try: | |
127 os.makedirs(path, mode=mode, exist_ok=True) | |
128 except OSError: | |
129 pass | |
130 |