Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 411:e7b865f8f335
bake: Enable multiprocess baking.
Baking is now done by running a worker per CPU, and sending jobs to them.
This changes several things across the codebase:
* Ability to not cache things related to pages other than the 'main' page
(i.e. the page at the bottom of the execution stack).
* Decouple the baking process from the bake records, so only the main process
keeps track (and modifies) the bake record.
* Remove the need for 'batch page getters' and loading a page directly from
the page factories.
There are various smaller changes too included here, including support for
scope performance timers that are saved with the bake record and can be
printed out to the console. Yes I got carried away.
For testing, the in-memory 'mock' file-system doesn't work anymore, since
we're spawning processes, so this is replaced by a 'tmpfs' file-system which
is saved in temporary files on disk and deleted after tests have run.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 12 Jun 2015 17:09:19 -0700 |
parents | 7081a5f88e10 |
children | 21e26ed867b6 |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
4 from piecrust.processing.base import SimpleFileProcessor | |
5 | |
6 | |
7 logger = logging.getLogger(__name__) | |
8 | |
9 | |
10 SITEMAP_HEADER = \ | |
11 """<?xml version="1.0" encoding="utf-8"?> | |
12 <urlset | |
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
14 """ | |
15 SITEMAP_FOOTER = "</urlset>\n" | |
16 | |
17 SITEURL_HEADER = " <url>\n" | |
18 SITEURL_LOC = " <loc>%s</loc>\n" | |
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
21 SITEURL_PRIORITY = " <priority>%f</priority>\n" | |
22 SITEURL_FOOTER = " </url>\n" | |
23 | |
24 | |
25 class SitemapProcessor(SimpleFileProcessor): | |
26 PROCESSOR_NAME = 'sitemap' | |
27 | |
28 def __init__(self): | |
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
30 self._start_time = None | |
31 | |
32 def onPipelineStart(self, pipeline): | |
33 self._start_time = time.time() | |
34 | |
35 def _doProcess(self, in_path, out_path): | |
36 with open(in_path, 'r') as fp: | |
37 sitemap = yaml.load(fp) | |
38 | |
39 with open(out_path, 'w') as fp: | |
40 fp.write(SITEMAP_HEADER) | |
41 self._writeManualLocs(sitemap, fp) | |
42 self._writeAutoLocs(sitemap, fp) | |
43 fp.write(SITEMAP_FOOTER) | |
44 | |
45 return True | |
46 | |
47 def _writeManualLocs(self, sitemap, fp): | |
48 locs = sitemap.setdefault('locations', None) | |
49 if not locs: | |
50 return | |
51 | |
52 logger.debug("Generating manual sitemap entries.") | |
53 for loc in locs: | |
54 self._writeEntry(loc, fp) | |
55 | |
56 def _writeAutoLocs(self, sitemap, fp): | |
57 source_names = sitemap.setdefault('autogen', None) | |
58 if not source_names: | |
59 return | |
60 | |
61 for name in source_names: | |
62 logger.debug("Generating automatic sitemap entries for '%s'." % | |
63 name) | |
64 source = self.app.getSource(name) | |
65 if source is None: | |
66 raise Exception("No such source: %s" % name) | |
67 | |
114
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
68 for page in source.getPages(): |
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
69 route = self.app.getRoute(source.name, page.source_metadata) |
287
7081a5f88e10
sitemap: Fix broken API call.
Ludovic Chabant <ludovic@chabant.com>
parents:
234
diff
changeset
|
70 uri = route.getUri(page.source_metadata, provider=page) |
34 | 71 |
72 t = page.datetime.timestamp() | |
73 sm_cfg = page.config.get('sitemap') | |
74 | |
75 args = {'url': uri, 'lastmod': strftime_iso8601(t)} | |
76 if sm_cfg: | |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
34 | 78 |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |