changeset 34:bdb103c57168

Add `sitemap` processor.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 20 Aug 2014 14:55:23 -0700
parents 62c7a97c8340
children e4c345dcf33c
files piecrust/plugins/builtin.py piecrust/processing/sitemap.py
diffstat 2 files changed, 99 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/plugins/builtin.py	Tue Aug 19 15:36:28 2014 -0700
+++ b/piecrust/plugins/builtin.py	Wed Aug 20 14:55:23 2014 -0700
@@ -9,6 +9,7 @@
 from piecrust.plugins.base import PieCrustPlugin
 from piecrust.processing.base import CopyFileProcessor
 from piecrust.processing.less import LessProcessor
+from piecrust.processing.sitemap import SitemapProcessor
 from piecrust.sources.base import DefaultPageSource
 from piecrust.sources.posts import (FlatPostsSource, ShallowPostsSource,
         HierarchyPostsSource)
@@ -60,5 +61,6 @@
     def getProcessors(self):
         return [
                 CopyFileProcessor(),
-                LessProcessor()]
+                LessProcessor(),
+                SitemapProcessor()]
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/processing/sitemap.py	Wed Aug 20 14:55:23 2014 -0700
@@ -0,0 +1,96 @@
+import time
+import logging
+import yaml
+from piecrust.processing.base import SimpleFileProcessor
+
+
+logger = logging.getLogger(__name__)
+
+
+SITEMAP_HEADER = \
+"""<?xml version="1.0" encoding="utf-8"?>
+<urlset
+  xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+"""
+SITEMAP_FOOTER = "</urlset>\n"
+
+SITEURL_HEADER =     "  <url>\n"
+SITEURL_LOC =        "    <loc>%s</loc>\n"
+SITEURL_LASTMOD =    "    <lastmod>%s</lastmod>\n"
+SITEURL_CHANGEFREQ = "    <changefreq>%s</changefreq>\n"
+SITEURL_PRIORITY =   "    <priority>%f</priority>\n"
+SITEURL_FOOTER =     "  </url>\n"
+
+
+class SitemapProcessor(SimpleFileProcessor):
+    PROCESSOR_NAME = 'sitemap'
+
+    def __init__(self):
+        super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
+        self._start_time = None
+
+    def onPipelineStart(self, pipeline):
+        self._start_time = time.time()
+
+    def _doProcess(self, in_path, out_path):
+        with open(in_path, 'r') as fp:
+            sitemap = yaml.load(fp)
+
+        with open(out_path, 'w') as fp:
+            fp.write(SITEMAP_HEADER)
+            self._writeManualLocs(sitemap, fp)
+            self._writeAutoLocs(sitemap, fp)
+            fp.write(SITEMAP_FOOTER)
+
+        return True
+
+    def _writeManualLocs(self, sitemap, fp):
+        locs = sitemap.setdefault('locations', None)
+        if not locs:
+            return
+
+        logger.debug("Generating manual sitemap entries.")
+        for loc in locs:
+            self._writeEntry(loc, fp)
+
+    def _writeAutoLocs(self, sitemap, fp):
+        source_names = sitemap.setdefault('autogen', None)
+        if not source_names:
+            return
+
+        for name in source_names:
+            logger.debug("Generating automatic sitemap entries for '%s'." %
+                    name)
+            source = self.app.getSource(name)
+            if source is None:
+                raise Exception("No such source: %s" % name)
+
+            for fac in source.getPageFactories():
+                route = self.app.getRoute(source.name, fac.metadata)
+                uri = route.getUri(fac.metadata)
+
+                page = fac.buildPage()
+                t = page.datetime.timestamp()
+                sm_cfg = page.config.get('sitemap')
+
+                args = {'url': uri, 'lastmod': strftime_iso8601(t)}
+                if sm_cfg:
+                    args.update(cm_cfg)
+
+                self._writeEntry(args, fp)
+
+    def _writeEntry(self, args, fp):
+        fp.write(SITEURL_HEADER)
+        fp.write(SITEURL_LOC % args['url'])
+        if 'lastmod' in args:
+            fp.write(SITEURL_LASTMOD % args['lastmod'])
+        if 'changefreq' in args:
+            fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
+        if 'priority' in args:
+            fp.write(SITEURL_PRIORITY % args['priority'])
+        fp.write(SITEURL_FOOTER)
+
+
+def strftime_iso8601(t):
+    return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
+