34
|
1 import time
|
|
2 import logging
|
|
3 import yaml
|
|
4 from piecrust.processing.base import SimpleFileProcessor
|
|
5
|
|
6
|
|
7 logger = logging.getLogger(__name__)
|
|
8
|
|
9
|
|
10 SITEMAP_HEADER = \
|
|
11 """<?xml version="1.0" encoding="utf-8"?>
|
|
12 <urlset
|
|
13 xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
14 """
|
|
15 SITEMAP_FOOTER = "</urlset>\n"
|
|
16
|
|
17 SITEURL_HEADER = " <url>\n"
|
|
18 SITEURL_LOC = " <loc>%s</loc>\n"
|
|
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n"
|
|
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n"
|
|
21 SITEURL_PRIORITY = " <priority>%f</priority>\n"
|
|
22 SITEURL_FOOTER = " </url>\n"
|
|
23
|
|
24
|
|
25 class SitemapProcessor(SimpleFileProcessor):
|
|
26 PROCESSOR_NAME = 'sitemap'
|
|
27
|
|
28 def __init__(self):
|
|
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
|
|
30 self._start_time = None
|
|
31
|
|
32 def onPipelineStart(self, pipeline):
|
|
33 self._start_time = time.time()
|
|
34
|
|
35 def _doProcess(self, in_path, out_path):
|
|
36 with open(in_path, 'r') as fp:
|
|
37 sitemap = yaml.load(fp)
|
|
38
|
|
39 with open(out_path, 'w') as fp:
|
|
40 fp.write(SITEMAP_HEADER)
|
|
41 self._writeManualLocs(sitemap, fp)
|
|
42 self._writeAutoLocs(sitemap, fp)
|
|
43 fp.write(SITEMAP_FOOTER)
|
|
44
|
|
45 return True
|
|
46
|
|
47 def _writeManualLocs(self, sitemap, fp):
|
|
48 locs = sitemap.setdefault('locations', None)
|
|
49 if not locs:
|
|
50 return
|
|
51
|
|
52 logger.debug("Generating manual sitemap entries.")
|
|
53 for loc in locs:
|
|
54 self._writeEntry(loc, fp)
|
|
55
|
|
56 def _writeAutoLocs(self, sitemap, fp):
|
|
57 source_names = sitemap.setdefault('autogen', None)
|
|
58 if not source_names:
|
|
59 return
|
|
60
|
|
61 for name in source_names:
|
|
62 logger.debug("Generating automatic sitemap entries for '%s'." %
|
|
63 name)
|
|
64 source = self.app.getSource(name)
|
|
65 if source is None:
|
|
66 raise Exception("No such source: %s" % name)
|
|
67
|
|
68 for fac in source.getPageFactories():
|
|
69 route = self.app.getRoute(source.name, fac.metadata)
|
|
70 uri = route.getUri(fac.metadata)
|
|
71
|
|
72 page = fac.buildPage()
|
|
73 t = page.datetime.timestamp()
|
|
74 sm_cfg = page.config.get('sitemap')
|
|
75
|
|
76 args = {'url': uri, 'lastmod': strftime_iso8601(t)}
|
|
77 if sm_cfg:
|
|
78 args.update(cm_cfg)
|
|
79
|
|
80 self._writeEntry(args, fp)
|
|
81
|
|
82 def _writeEntry(self, args, fp):
|
|
83 fp.write(SITEURL_HEADER)
|
|
84 fp.write(SITEURL_LOC % args['url'])
|
|
85 if 'lastmod' in args:
|
|
86 fp.write(SITEURL_LASTMOD % args['lastmod'])
|
|
87 if 'changefreq' in args:
|
|
88 fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
|
|
89 if 'priority' in args:
|
|
90 fp.write(SITEURL_PRIORITY % args['priority'])
|
|
91 fp.write(SITEURL_FOOTER)
|
|
92
|
|
93
|
|
94 def strftime_iso8601(t):
|
|
95 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
|
|
96
|