Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 550:6f216c1ab6b1
bake: Add a flag to know which record entries got collapsed from last run.
This makes it possible to find entries for things that were actually baked
during the current run, as opposed to skipped because they were "clean".
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 04 Aug 2015 21:22:30 -0700 |
parents | 62274d805a6e |
children | 4850f8c21b6e |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
4 from piecrust.data.iterators import PageIterator |
34 | 5 from piecrust.processing.base import SimpleFileProcessor |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
6 from piecrust.routing import create_route_metadata |
34 | 7 |
8 | |
9 logger = logging.getLogger(__name__) | |
10 | |
11 | |
12 SITEMAP_HEADER = \ | |
13 """<?xml version="1.0" encoding="utf-8"?> | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
14 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
34 | 15 """ |
16 SITEMAP_FOOTER = "</urlset>\n" | |
17 | |
18 SITEURL_HEADER = " <url>\n" | |
19 SITEURL_LOC = " <loc>%s</loc>\n" | |
20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
22 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" |
34 | 23 SITEURL_FOOTER = " </url>\n" |
24 | |
25 | |
26 class SitemapProcessor(SimpleFileProcessor): | |
27 PROCESSOR_NAME = 'sitemap' | |
28 | |
29 def __init__(self): | |
30 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
31 self._start_time = None | |
32 | |
33 def onPipelineStart(self, pipeline): | |
34 self._start_time = time.time() | |
35 | |
36 def _doProcess(self, in_path, out_path): | |
37 with open(in_path, 'r') as fp: | |
38 sitemap = yaml.load(fp) | |
39 | |
40 with open(out_path, 'w') as fp: | |
41 fp.write(SITEMAP_HEADER) | |
42 self._writeManualLocs(sitemap, fp) | |
43 self._writeAutoLocs(sitemap, fp) | |
44 fp.write(SITEMAP_FOOTER) | |
45 | |
46 return True | |
47 | |
48 def _writeManualLocs(self, sitemap, fp): | |
49 locs = sitemap.setdefault('locations', None) | |
50 if not locs: | |
51 return | |
52 | |
53 logger.debug("Generating manual sitemap entries.") | |
54 for loc in locs: | |
55 self._writeEntry(loc, fp) | |
56 | |
57 def _writeAutoLocs(self, sitemap, fp): | |
58 source_names = sitemap.setdefault('autogen', None) | |
59 if not source_names: | |
60 return | |
61 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
62 cur_time = strftime_iso8601(time.time()) |
34 | 63 for name in source_names: |
64 logger.debug("Generating automatic sitemap entries for '%s'." % | |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
65 name) |
34 | 66 source = self.app.getSource(name) |
67 if source is None: | |
68 raise Exception("No such source: %s" % name) | |
69 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
70 it = PageIterator(source) |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
71 for page in it: |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
72 uri = page['url'] |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
73 sm_cfg = page.get('sitemap') |
34 | 74 |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
75 args = {'url': uri, 'lastmod': cur_time} |
34 | 76 if sm_cfg: |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
34 | 78 |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |