annotate piecrust/processing/sitemap.py @ 550:6f216c1ab6b1

bake: Add a flag to know which record entries got collapsed from last run. This makes it possible to find entries for things that were actually baked during the current run, as opposed to skipped because they were "clean".
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 04 Aug 2015 21:22:30 -0700
parents 62274d805a6e
children 4850f8c21b6e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import time
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
2 import logging
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import yaml
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
4 from piecrust.data.iterators import PageIterator
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 from piecrust.processing.base import SimpleFileProcessor
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
6 from piecrust.routing import create_route_metadata
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9 logger = logging.getLogger(__name__)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12 SITEMAP_HEADER = \
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13 """<?xml version="1.0" encoding="utf-8"?>
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
14 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
15 """
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
16 SITEMAP_FOOTER = "</urlset>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18 SITEURL_HEADER = " <url>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 SITEURL_LOC = " <loc>%s</loc>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n"
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
22 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n"
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 SITEURL_FOOTER = " </url>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26 class SitemapProcessor(SimpleFileProcessor):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 PROCESSOR_NAME = 'sitemap'
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29 def __init__(self):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 self._start_time = None
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33 def onPipelineStart(self, pipeline):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
34 self._start_time = time.time()
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36 def _doProcess(self, in_path, out_path):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 with open(in_path, 'r') as fp:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 sitemap = yaml.load(fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
39
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40 with open(out_path, 'w') as fp:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
41 fp.write(SITEMAP_HEADER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
42 self._writeManualLocs(sitemap, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
43 self._writeAutoLocs(sitemap, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
44 fp.write(SITEMAP_FOOTER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
45
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
46 return True
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
47
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
48 def _writeManualLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
49 locs = sitemap.setdefault('locations', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
50 if not locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
51 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
52
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
53 logger.debug("Generating manual sitemap entries.")
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
54 for loc in locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
55 self._writeEntry(loc, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
56
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
57 def _writeAutoLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
58 source_names = sitemap.setdefault('autogen', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
59 if not source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
60 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
61
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
62 cur_time = strftime_iso8601(time.time())
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
63 for name in source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
64 logger.debug("Generating automatic sitemap entries for '%s'." %
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
65 name)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66 source = self.app.getSource(name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 if source is None:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68 raise Exception("No such source: %s" % name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
69
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
70 it = PageIterator(source)
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
71 for page in it:
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
72 uri = page['url']
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
73 sm_cfg = page.get('sitemap')
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
74
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
75 args = {'url': uri, 'lastmod': cur_time}
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
76 if sm_cfg:
234
1c4078ec3011 sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents: 147
diff changeset
77 args.update(sm_cfg)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
78
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
79 self._writeEntry(args, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
80
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
81 def _writeEntry(self, args, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
82 fp.write(SITEURL_HEADER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
83 fp.write(SITEURL_LOC % args['url'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
84 if 'lastmod' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
85 fp.write(SITEURL_LASTMOD % args['lastmod'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
86 if 'changefreq' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
88 if 'priority' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
89 fp.write(SITEURL_PRIORITY % args['priority'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
90 fp.write(SITEURL_FOOTER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
91
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
92
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
93 def strftime_iso8601(t):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
95