annotate piecrust/processing/sitemap.py @ 434:6238dcfc7a78

reporting: Print errors that occured during pipeline processing.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 27 Jun 2015 21:47:48 -0700
parents 21e26ed867b6
children 62274d805a6e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import time
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2 import logging
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import yaml
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 from piecrust.processing.base import SimpleFileProcessor
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
5 from piecrust.routing import create_route_metadata
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
6
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8 logger = logging.getLogger(__name__)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 SITEMAP_HEADER = \
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12 """<?xml version="1.0" encoding="utf-8"?>
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13 <urlset
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15 """
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 SITEMAP_FOOTER = "</urlset>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18 SITEURL_HEADER = " <url>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 SITEURL_LOC = " <loc>%s</loc>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 SITEURL_PRIORITY = " <priority>%f</priority>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 SITEURL_FOOTER = " </url>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26 class SitemapProcessor(SimpleFileProcessor):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 PROCESSOR_NAME = 'sitemap'
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29 def __init__(self):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 self._start_time = None
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 def onPipelineStart(self, pipeline):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 self._start_time = time.time()
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36 def _doProcess(self, in_path, out_path):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 with open(in_path, 'r') as fp:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 sitemap = yaml.load(fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 with open(out_path, 'w') as fp:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41 fp.write(SITEMAP_HEADER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
42 self._writeManualLocs(sitemap, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
43 self._writeAutoLocs(sitemap, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
44 fp.write(SITEMAP_FOOTER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
45
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
46 return True
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
47
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
48 def _writeManualLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
49 locs = sitemap.setdefault('locations', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
50 if not locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
51 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
52
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
53 logger.debug("Generating manual sitemap entries.")
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
54 for loc in locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
55 self._writeEntry(loc, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
56
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
57 def _writeAutoLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
58 source_names = sitemap.setdefault('autogen', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
59 if not source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
60 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
61
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
62 for name in source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
63 logger.debug("Generating automatic sitemap entries for '%s'." %
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
64 name)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
65 source = self.app.getSource(name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66 if source is None:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 raise Exception("No such source: %s" % name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68
114
371a6c879ab9 When possible, try and batch-load pages so we only lock once.
Ludovic Chabant <ludovic@chabant.com>
parents: 34
diff changeset
69 for page in source.getPages():
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
70 route_metadata = create_route_metadata(page)
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
71 route = self.app.getRoute(source.name, route_metadata)
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
72 uri = route.getUri(route_metadata)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
74 t = page.datetime.timestamp()
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
75 sm_cfg = page.config.get('sitemap')
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
76
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
77 args = {'url': uri, 'lastmod': strftime_iso8601(t)}
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
78 if sm_cfg:
234
1c4078ec3011 sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents: 147
diff changeset
79 args.update(sm_cfg)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
80
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
81 self._writeEntry(args, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
82
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
83 def _writeEntry(self, args, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
84 fp.write(SITEURL_HEADER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
85 fp.write(SITEURL_LOC % args['url'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
86 if 'lastmod' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
87 fp.write(SITEURL_LASTMOD % args['lastmod'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
88 if 'changefreq' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
89 fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
90 if 'priority' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
91 fp.write(SITEURL_PRIORITY % args['priority'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
92 fp.write(SITEURL_FOOTER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
93
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
94
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
95 def strftime_iso8601(t):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
96 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
97