Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 661:2f780b191541
internal: Fix a bug with registering taxonomy terms that are not strings.
Some objects, like the blog data provider's taxnonomy entries, can render as
strings, but are objects themselves. When registering them as "used terms", we
need to use their string representation.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 01 Mar 2016 22:26:09 -0800 |
parents | 62274d805a6e |
children | 4850f8c21b6e |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
4 from piecrust.data.iterators import PageIterator |
34 | 5 from piecrust.processing.base import SimpleFileProcessor |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
6 from piecrust.routing import create_route_metadata |
34 | 7 |
8 | |
9 logger = logging.getLogger(__name__) | |
10 | |
11 | |
12 SITEMAP_HEADER = \ | |
13 """<?xml version="1.0" encoding="utf-8"?> | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
14 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
34 | 15 """ |
16 SITEMAP_FOOTER = "</urlset>\n" | |
17 | |
18 SITEURL_HEADER = " <url>\n" | |
19 SITEURL_LOC = " <loc>%s</loc>\n" | |
20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
22 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" |
34 | 23 SITEURL_FOOTER = " </url>\n" |
24 | |
25 | |
26 class SitemapProcessor(SimpleFileProcessor): | |
27 PROCESSOR_NAME = 'sitemap' | |
28 | |
29 def __init__(self): | |
30 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
31 self._start_time = None | |
32 | |
33 def onPipelineStart(self, pipeline): | |
34 self._start_time = time.time() | |
35 | |
36 def _doProcess(self, in_path, out_path): | |
37 with open(in_path, 'r') as fp: | |
38 sitemap = yaml.load(fp) | |
39 | |
40 with open(out_path, 'w') as fp: | |
41 fp.write(SITEMAP_HEADER) | |
42 self._writeManualLocs(sitemap, fp) | |
43 self._writeAutoLocs(sitemap, fp) | |
44 fp.write(SITEMAP_FOOTER) | |
45 | |
46 return True | |
47 | |
48 def _writeManualLocs(self, sitemap, fp): | |
49 locs = sitemap.setdefault('locations', None) | |
50 if not locs: | |
51 return | |
52 | |
53 logger.debug("Generating manual sitemap entries.") | |
54 for loc in locs: | |
55 self._writeEntry(loc, fp) | |
56 | |
57 def _writeAutoLocs(self, sitemap, fp): | |
58 source_names = sitemap.setdefault('autogen', None) | |
59 if not source_names: | |
60 return | |
61 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
62 cur_time = strftime_iso8601(time.time()) |
34 | 63 for name in source_names: |
64 logger.debug("Generating automatic sitemap entries for '%s'." % | |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
65 name) |
34 | 66 source = self.app.getSource(name) |
67 if source is None: | |
68 raise Exception("No such source: %s" % name) | |
69 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
70 it = PageIterator(source) |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
71 for page in it: |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
72 uri = page['url'] |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
73 sm_cfg = page.get('sitemap') |
34 | 74 |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
75 args = {'url': uri, 'lastmod': cur_time} |
34 | 76 if sm_cfg: |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
34 | 78 |
79 self._writeEntry(args, fp) | |
80 | |
81 def _writeEntry(self, args, fp): | |
82 fp.write(SITEURL_HEADER) | |
83 fp.write(SITEURL_LOC % args['url']) | |
84 if 'lastmod' in args: | |
85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
86 if 'changefreq' in args: | |
87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
88 if 'priority' in args: | |
89 fp.write(SITEURL_PRIORITY % args['priority']) | |
90 fp.write(SITEURL_FOOTER) | |
91 | |
92 | |
93 def strftime_iso8601(t): | |
94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
95 |