Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 173:0a86a7a6b284
routes: Actually match metadata when finding routes, fix problems with paths.
When we look for a route that matches a given page, we now look at the
source metadata that comes with that page, and compare it to the metadata
we need to build URIs.
Also, when matching URIs, we handle the case where a 'path'-component in
our pattern may be completely empty, and thus we may be missing some trailing
slashes in the URI.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sat, 03 Jan 2015 21:10:44 -0800 |
parents | ab6e7e0e9d44 |
children | 1c4078ec3011 |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
4 from piecrust.processing.base import SimpleFileProcessor | |
5 | |
6 | |
7 logger = logging.getLogger(__name__) | |
8 | |
9 | |
10 SITEMAP_HEADER = \ | |
11 """<?xml version="1.0" encoding="utf-8"?> | |
12 <urlset | |
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
14 """ | |
15 SITEMAP_FOOTER = "</urlset>\n" | |
16 | |
17 SITEURL_HEADER = " <url>\n" | |
18 SITEURL_LOC = " <loc>%s</loc>\n" | |
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
21 SITEURL_PRIORITY = " <priority>%f</priority>\n" | |
22 SITEURL_FOOTER = " </url>\n" | |
23 | |
24 | |
25 class SitemapProcessor(SimpleFileProcessor): | |
26 PROCESSOR_NAME = 'sitemap' | |
27 | |
28 def __init__(self): | |
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
30 self._start_time = None | |
31 | |
32 def onPipelineStart(self, pipeline): | |
33 self._start_time = time.time() | |
34 | |
35 def _doProcess(self, in_path, out_path): | |
36 with open(in_path, 'r') as fp: | |
37 sitemap = yaml.load(fp) | |
38 | |
39 with open(out_path, 'w') as fp: | |
40 fp.write(SITEMAP_HEADER) | |
41 self._writeManualLocs(sitemap, fp) | |
42 self._writeAutoLocs(sitemap, fp) | |
43 fp.write(SITEMAP_FOOTER) | |
44 | |
45 return True | |
46 | |
47 def _writeManualLocs(self, sitemap, fp): | |
48 locs = sitemap.setdefault('locations', None) | |
49 if not locs: | |
50 return | |
51 | |
52 logger.debug("Generating manual sitemap entries.") | |
53 for loc in locs: | |
54 self._writeEntry(loc, fp) | |
55 | |
56 def _writeAutoLocs(self, sitemap, fp): | |
57 source_names = sitemap.setdefault('autogen', None) | |
58 if not source_names: | |
59 return | |
60 | |
61 for name in source_names: | |
62 logger.debug("Generating automatic sitemap entries for '%s'." % | |
63 name) | |
64 source = self.app.getSource(name) | |
65 if source is None: | |
66 raise Exception("No such source: %s" % name) | |
67 | |
114
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
68 for page in source.getPages(): |
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
69 route = self.app.getRoute(source.name, page.source_metadata) |
147
ab6e7e0e9d44
Pass date information to routing when building URLs.
Ludovic Chabant <ludovic@chabant.com>
parents:
114
diff
changeset
|
70 uri = route.getUri(page.source_metadata, page) |
34 | 71 |
72 t = page.datetime.timestamp() | |
73 sm_cfg = page.config.get('sitemap') | |
74 | |
75 args = {'url': uri, 'lastmod': strftime_iso8601(t)} | |
76 if sm_cfg: | |
77 args.update(cm_cfg) | |
78 | |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |