Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 380:f33712c4cfab
routing: Fix bugs with matching URLs with correct route but missing metadata.
When matching a route like `/foo/%slug%` against an URL like `/foo`, the route
will (correctly) return a match, but it will be completely missing the `slug`
metadata, resulting in problems elsewhere. This change makes it so that any
missing route metadata will be filled in with an empty string.
And because this means generated URLs may differ from the incoming URL when
using trailing slashes (`/foo/` _vs._ `/foo`), we make the assert in the
chef server handle those discrepancies.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 10 May 2015 00:34:21 -0700 |
parents | 7081a5f88e10 |
children | 21e26ed867b6 |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
4 from piecrust.processing.base import SimpleFileProcessor | |
5 | |
6 | |
7 logger = logging.getLogger(__name__) | |
8 | |
9 | |
10 SITEMAP_HEADER = \ | |
11 """<?xml version="1.0" encoding="utf-8"?> | |
12 <urlset | |
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
14 """ | |
15 SITEMAP_FOOTER = "</urlset>\n" | |
16 | |
17 SITEURL_HEADER = " <url>\n" | |
18 SITEURL_LOC = " <loc>%s</loc>\n" | |
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
21 SITEURL_PRIORITY = " <priority>%f</priority>\n" | |
22 SITEURL_FOOTER = " </url>\n" | |
23 | |
24 | |
25 class SitemapProcessor(SimpleFileProcessor): | |
26 PROCESSOR_NAME = 'sitemap' | |
27 | |
28 def __init__(self): | |
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
30 self._start_time = None | |
31 | |
32 def onPipelineStart(self, pipeline): | |
33 self._start_time = time.time() | |
34 | |
35 def _doProcess(self, in_path, out_path): | |
36 with open(in_path, 'r') as fp: | |
37 sitemap = yaml.load(fp) | |
38 | |
39 with open(out_path, 'w') as fp: | |
40 fp.write(SITEMAP_HEADER) | |
41 self._writeManualLocs(sitemap, fp) | |
42 self._writeAutoLocs(sitemap, fp) | |
43 fp.write(SITEMAP_FOOTER) | |
44 | |
45 return True | |
46 | |
47 def _writeManualLocs(self, sitemap, fp): | |
48 locs = sitemap.setdefault('locations', None) | |
49 if not locs: | |
50 return | |
51 | |
52 logger.debug("Generating manual sitemap entries.") | |
53 for loc in locs: | |
54 self._writeEntry(loc, fp) | |
55 | |
56 def _writeAutoLocs(self, sitemap, fp): | |
57 source_names = sitemap.setdefault('autogen', None) | |
58 if not source_names: | |
59 return | |
60 | |
61 for name in source_names: | |
62 logger.debug("Generating automatic sitemap entries for '%s'." % | |
63 name) | |
64 source = self.app.getSource(name) | |
65 if source is None: | |
66 raise Exception("No such source: %s" % name) | |
67 | |
114
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
68 for page in source.getPages(): |
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
69 route = self.app.getRoute(source.name, page.source_metadata) |
287
7081a5f88e10
sitemap: Fix broken API call.
Ludovic Chabant <ludovic@chabant.com>
parents:
234
diff
changeset
|
70 uri = route.getUri(page.source_metadata, provider=page) |
34 | 71 |
72 t = page.datetime.timestamp() | |
73 sm_cfg = page.config.get('sitemap') | |
74 | |
75 args = {'url': uri, 'lastmod': strftime_iso8601(t)} | |
76 if sm_cfg: | |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
34 | 78 |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |