comparison piecrust/processing/sitemap.py @ 437:62274d805a6e

bake: Tweaks to the `sitemap` processor. Add tests. * Now using a `PageIterator` to get the expected behaviour from a given page source, such as skipping taxonomy pages and such. * Fix formatting of priority. * Use the bake time for `lastmod` attribute.
author Ludovic Chabant <ludovic@chabant.com>
date Sat, 27 Jun 2015 21:50:36 -0700
parents 21e26ed867b6
children 4850f8c21b6e
comparison
equal deleted inserted replaced
436:2aa879d63133 437:62274d805a6e
1 import time 1 import time
2 import logging 2 import logging
3 import yaml 3 import yaml
4 from piecrust.data.iterators import PageIterator
4 from piecrust.processing.base import SimpleFileProcessor 5 from piecrust.processing.base import SimpleFileProcessor
5 from piecrust.routing import create_route_metadata 6 from piecrust.routing import create_route_metadata
6 7
7 8
8 logger = logging.getLogger(__name__) 9 logger = logging.getLogger(__name__)
9 10
10 11
11 SITEMAP_HEADER = \ 12 SITEMAP_HEADER = \
12 """<?xml version="1.0" encoding="utf-8"?> 13 """<?xml version="1.0" encoding="utf-8"?>
13 <urlset 14 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
14 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
15 """ 15 """
16 SITEMAP_FOOTER = "</urlset>\n" 16 SITEMAP_FOOTER = "</urlset>\n"
17 17
18 SITEURL_HEADER = " <url>\n" 18 SITEURL_HEADER = " <url>\n"
19 SITEURL_LOC = " <loc>%s</loc>\n" 19 SITEURL_LOC = " <loc>%s</loc>\n"
20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" 20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n"
21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" 21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n"
22 SITEURL_PRIORITY = " <priority>%f</priority>\n" 22 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n"
23 SITEURL_FOOTER = " </url>\n" 23 SITEURL_FOOTER = " </url>\n"
24 24
25 25
26 class SitemapProcessor(SimpleFileProcessor): 26 class SitemapProcessor(SimpleFileProcessor):
27 PROCESSOR_NAME = 'sitemap' 27 PROCESSOR_NAME = 'sitemap'
57 def _writeAutoLocs(self, sitemap, fp): 57 def _writeAutoLocs(self, sitemap, fp):
58 source_names = sitemap.setdefault('autogen', None) 58 source_names = sitemap.setdefault('autogen', None)
59 if not source_names: 59 if not source_names:
60 return 60 return
61 61
62 cur_time = strftime_iso8601(time.time())
62 for name in source_names: 63 for name in source_names:
63 logger.debug("Generating automatic sitemap entries for '%s'." % 64 logger.debug("Generating automatic sitemap entries for '%s'." %
64 name) 65 name)
65 source = self.app.getSource(name) 66 source = self.app.getSource(name)
66 if source is None: 67 if source is None:
67 raise Exception("No such source: %s" % name) 68 raise Exception("No such source: %s" % name)
68 69
69 for page in source.getPages(): 70 it = PageIterator(source)
70 route_metadata = create_route_metadata(page) 71 for page in it:
71 route = self.app.getRoute(source.name, route_metadata) 72 uri = page['url']
72 uri = route.getUri(route_metadata) 73 sm_cfg = page.get('sitemap')
73 74
74 t = page.datetime.timestamp() 75 args = {'url': uri, 'lastmod': cur_time}
75 sm_cfg = page.config.get('sitemap')
76
77 args = {'url': uri, 'lastmod': strftime_iso8601(t)}
78 if sm_cfg: 76 if sm_cfg:
79 args.update(sm_cfg) 77 args.update(sm_cfg)
80 78
81 self._writeEntry(args, fp) 79 self._writeEntry(args, fp)
82 80