Mercurial > piecrust2
changeset 34:bdb103c57168
Add `sitemap` processor.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 20 Aug 2014 14:55:23 -0700 |
parents | 62c7a97c8340 |
children | e4c345dcf33c |
files | piecrust/plugins/builtin.py piecrust/processing/sitemap.py |
diffstat | 2 files changed, 99 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/plugins/builtin.py Tue Aug 19 15:36:28 2014 -0700 +++ b/piecrust/plugins/builtin.py Wed Aug 20 14:55:23 2014 -0700 @@ -9,6 +9,7 @@ from piecrust.plugins.base import PieCrustPlugin from piecrust.processing.base import CopyFileProcessor from piecrust.processing.less import LessProcessor +from piecrust.processing.sitemap import SitemapProcessor from piecrust.sources.base import DefaultPageSource from piecrust.sources.posts import (FlatPostsSource, ShallowPostsSource, HierarchyPostsSource) @@ -60,5 +61,6 @@ def getProcessors(self): return [ CopyFileProcessor(), - LessProcessor()] + LessProcessor(), + SitemapProcessor()]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/processing/sitemap.py Wed Aug 20 14:55:23 2014 -0700 @@ -0,0 +1,96 @@ +import time +import logging +import yaml +from piecrust.processing.base import SimpleFileProcessor + + +logger = logging.getLogger(__name__) + + +SITEMAP_HEADER = \ +"""<?xml version="1.0" encoding="utf-8"?> +<urlset + xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> +""" +SITEMAP_FOOTER = "</urlset>\n" + +SITEURL_HEADER = " <url>\n" +SITEURL_LOC = " <loc>%s</loc>\n" +SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" +SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" +SITEURL_PRIORITY = " <priority>%f</priority>\n" +SITEURL_FOOTER = " </url>\n" + + +class SitemapProcessor(SimpleFileProcessor): + PROCESSOR_NAME = 'sitemap' + + def __init__(self): + super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) + self._start_time = None + + def onPipelineStart(self, pipeline): + self._start_time = time.time() + + def _doProcess(self, in_path, out_path): + with open(in_path, 'r') as fp: + sitemap = yaml.load(fp) + + with open(out_path, 'w') as fp: + fp.write(SITEMAP_HEADER) + self._writeManualLocs(sitemap, fp) + self._writeAutoLocs(sitemap, fp) + fp.write(SITEMAP_FOOTER) + + return True + + def _writeManualLocs(self, sitemap, fp): + locs = sitemap.setdefault('locations', None) + if not locs: + return + + logger.debug("Generating manual sitemap entries.") + for loc in locs: + self._writeEntry(loc, fp) + + def _writeAutoLocs(self, sitemap, fp): + source_names = sitemap.setdefault('autogen', None) + if not source_names: + return + + for name in source_names: + logger.debug("Generating automatic sitemap entries for '%s'." % + name) + source = self.app.getSource(name) + if source is None: + raise Exception("No such source: %s" % name) + + for fac in source.getPageFactories(): + route = self.app.getRoute(source.name, fac.metadata) + uri = route.getUri(fac.metadata) + + page = fac.buildPage() + t = page.datetime.timestamp() + sm_cfg = page.config.get('sitemap') + + args = {'url': uri, 'lastmod': strftime_iso8601(t)} + if sm_cfg: + args.update(cm_cfg) + + self._writeEntry(args, fp) + + def _writeEntry(self, args, fp): + fp.write(SITEURL_HEADER) + fp.write(SITEURL_LOC % args['url']) + if 'lastmod' in args: + fp.write(SITEURL_LASTMOD % args['lastmod']) + if 'changefreq' in args: + fp.write(SITEURL_CHANGEFREQ % args['changefreq']) + if 'priority' in args: + fp.write(SITEURL_PRIORITY % args['priority']) + fp.write(SITEURL_FOOTER) + + +def strftime_iso8601(t): + return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) +