Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 264:74bea91c9630
bake: Don't store internal config values in the bake record.
We sometimes store actual objects in the page config (like for instance page
linkers) and we don't want that to be pickled.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 24 Feb 2015 23:18:23 -0800 |
parents | 1c4078ec3011 |
children | 7081a5f88e10 |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
4 from piecrust.processing.base import SimpleFileProcessor | |
5 | |
6 | |
7 logger = logging.getLogger(__name__) | |
8 | |
9 | |
10 SITEMAP_HEADER = \ | |
11 """<?xml version="1.0" encoding="utf-8"?> | |
12 <urlset | |
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
14 """ | |
15 SITEMAP_FOOTER = "</urlset>\n" | |
16 | |
17 SITEURL_HEADER = " <url>\n" | |
18 SITEURL_LOC = " <loc>%s</loc>\n" | |
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
21 SITEURL_PRIORITY = " <priority>%f</priority>\n" | |
22 SITEURL_FOOTER = " </url>\n" | |
23 | |
24 | |
25 class SitemapProcessor(SimpleFileProcessor): | |
26 PROCESSOR_NAME = 'sitemap' | |
27 | |
28 def __init__(self): | |
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
30 self._start_time = None | |
31 | |
32 def onPipelineStart(self, pipeline): | |
33 self._start_time = time.time() | |
34 | |
35 def _doProcess(self, in_path, out_path): | |
36 with open(in_path, 'r') as fp: | |
37 sitemap = yaml.load(fp) | |
38 | |
39 with open(out_path, 'w') as fp: | |
40 fp.write(SITEMAP_HEADER) | |
41 self._writeManualLocs(sitemap, fp) | |
42 self._writeAutoLocs(sitemap, fp) | |
43 fp.write(SITEMAP_FOOTER) | |
44 | |
45 return True | |
46 | |
47 def _writeManualLocs(self, sitemap, fp): | |
48 locs = sitemap.setdefault('locations', None) | |
49 if not locs: | |
50 return | |
51 | |
52 logger.debug("Generating manual sitemap entries.") | |
53 for loc in locs: | |
54 self._writeEntry(loc, fp) | |
55 | |
56 def _writeAutoLocs(self, sitemap, fp): | |
57 source_names = sitemap.setdefault('autogen', None) | |
58 if not source_names: | |
59 return | |
60 | |
61 for name in source_names: | |
62 logger.debug("Generating automatic sitemap entries for '%s'." % | |
63 name) | |
64 source = self.app.getSource(name) | |
65 if source is None: | |
66 raise Exception("No such source: %s" % name) | |
67 | |
114
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
68 for page in source.getPages(): |
371a6c879ab9
When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents:
34
diff
changeset
|
69 route = self.app.getRoute(source.name, page.source_metadata) |
147
ab6e7e0e9d44
Pass date information to routing when building URLs.
Ludovic Chabant <ludovic@chabant.com>
parents:
114
diff
changeset
|
70 uri = route.getUri(page.source_metadata, page) |
34 | 71 |
72 t = page.datetime.timestamp() | |
73 sm_cfg = page.config.get('sitemap') | |
74 | |
75 args = {'url': uri, 'lastmod': strftime_iso8601(t)} | |
76 if sm_cfg: | |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
34 | 78 |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |