comparison piecrust/processing/sitemap.py @ 34:bdb103c57168

Add `sitemap` processor.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 20 Aug 2014 14:55:23 -0700
parents
children 371a6c879ab9
comparison
equal deleted inserted replaced
33:62c7a97c8340 34:bdb103c57168
1 import time
2 import logging
3 import yaml
4 from piecrust.processing.base import SimpleFileProcessor
5
6
7 logger = logging.getLogger(__name__)
8
9
10 SITEMAP_HEADER = \
11 """<?xml version="1.0" encoding="utf-8"?>
12 <urlset
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
14 """
15 SITEMAP_FOOTER = "</urlset>\n"
16
17 SITEURL_HEADER = " <url>\n"
18 SITEURL_LOC = " <loc>%s</loc>\n"
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n"
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n"
21 SITEURL_PRIORITY = " <priority>%f</priority>\n"
22 SITEURL_FOOTER = " </url>\n"
23
24
25 class SitemapProcessor(SimpleFileProcessor):
26 PROCESSOR_NAME = 'sitemap'
27
28 def __init__(self):
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
30 self._start_time = None
31
32 def onPipelineStart(self, pipeline):
33 self._start_time = time.time()
34
35 def _doProcess(self, in_path, out_path):
36 with open(in_path, 'r') as fp:
37 sitemap = yaml.load(fp)
38
39 with open(out_path, 'w') as fp:
40 fp.write(SITEMAP_HEADER)
41 self._writeManualLocs(sitemap, fp)
42 self._writeAutoLocs(sitemap, fp)
43 fp.write(SITEMAP_FOOTER)
44
45 return True
46
47 def _writeManualLocs(self, sitemap, fp):
48 locs = sitemap.setdefault('locations', None)
49 if not locs:
50 return
51
52 logger.debug("Generating manual sitemap entries.")
53 for loc in locs:
54 self._writeEntry(loc, fp)
55
56 def _writeAutoLocs(self, sitemap, fp):
57 source_names = sitemap.setdefault('autogen', None)
58 if not source_names:
59 return
60
61 for name in source_names:
62 logger.debug("Generating automatic sitemap entries for '%s'." %
63 name)
64 source = self.app.getSource(name)
65 if source is None:
66 raise Exception("No such source: %s" % name)
67
68 for fac in source.getPageFactories():
69 route = self.app.getRoute(source.name, fac.metadata)
70 uri = route.getUri(fac.metadata)
71
72 page = fac.buildPage()
73 t = page.datetime.timestamp()
74 sm_cfg = page.config.get('sitemap')
75
76 args = {'url': uri, 'lastmod': strftime_iso8601(t)}
77 if sm_cfg:
78 args.update(cm_cfg)
79
80 self._writeEntry(args, fp)
81
82 def _writeEntry(self, args, fp):
83 fp.write(SITEURL_HEADER)
84 fp.write(SITEURL_LOC % args['url'])
85 if 'lastmod' in args:
86 fp.write(SITEURL_LASTMOD % args['lastmod'])
87 if 'changefreq' in args:
88 fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
89 if 'priority' in args:
90 fp.write(SITEURL_PRIORITY % args['priority'])
91 fp.write(SITEURL_FOOTER)
92
93
94 def strftime_iso8601(t):
95 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
96