Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 578:683be25cbdb2
internal: Rename `raw_content` to `segments` since it's what it is.
| author | Ludovic Chabant <ludovic@chabant.com> |
|---|---|
| date | Wed, 30 Dec 2015 14:46:34 -0800 |
| parents | 62274d805a6e |
| children | 4850f8c21b6e |
| rev | line source |
|---|---|
| 34 | 1 import time |
| 2 import logging | |
| 3 import yaml | |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
4 from piecrust.data.iterators import PageIterator |
| 34 | 5 from piecrust.processing.base import SimpleFileProcessor |
|
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
6 from piecrust.routing import create_route_metadata |
| 34 | 7 |
| 8 | |
| 9 logger = logging.getLogger(__name__) | |
| 10 | |
| 11 | |
| 12 SITEMAP_HEADER = \ | |
| 13 """<?xml version="1.0" encoding="utf-8"?> | |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
14 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
| 34 | 15 """ |
| 16 SITEMAP_FOOTER = "</urlset>\n" | |
| 17 | |
| 18 SITEURL_HEADER = " <url>\n" | |
| 19 SITEURL_LOC = " <loc>%s</loc>\n" | |
| 20 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
| 21 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
22 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" |
| 34 | 23 SITEURL_FOOTER = " </url>\n" |
| 24 | |
| 25 | |
| 26 class SitemapProcessor(SimpleFileProcessor): | |
| 27 PROCESSOR_NAME = 'sitemap' | |
| 28 | |
| 29 def __init__(self): | |
| 30 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
| 31 self._start_time = None | |
| 32 | |
| 33 def onPipelineStart(self, pipeline): | |
| 34 self._start_time = time.time() | |
| 35 | |
| 36 def _doProcess(self, in_path, out_path): | |
| 37 with open(in_path, 'r') as fp: | |
| 38 sitemap = yaml.load(fp) | |
| 39 | |
| 40 with open(out_path, 'w') as fp: | |
| 41 fp.write(SITEMAP_HEADER) | |
| 42 self._writeManualLocs(sitemap, fp) | |
| 43 self._writeAutoLocs(sitemap, fp) | |
| 44 fp.write(SITEMAP_FOOTER) | |
| 45 | |
| 46 return True | |
| 47 | |
| 48 def _writeManualLocs(self, sitemap, fp): | |
| 49 locs = sitemap.setdefault('locations', None) | |
| 50 if not locs: | |
| 51 return | |
| 52 | |
| 53 logger.debug("Generating manual sitemap entries.") | |
| 54 for loc in locs: | |
| 55 self._writeEntry(loc, fp) | |
| 56 | |
| 57 def _writeAutoLocs(self, sitemap, fp): | |
| 58 source_names = sitemap.setdefault('autogen', None) | |
| 59 if not source_names: | |
| 60 return | |
| 61 | |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
62 cur_time = strftime_iso8601(time.time()) |
| 34 | 63 for name in source_names: |
| 64 logger.debug("Generating automatic sitemap entries for '%s'." % | |
|
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
65 name) |
| 34 | 66 source = self.app.getSource(name) |
| 67 if source is None: | |
| 68 raise Exception("No such source: %s" % name) | |
| 69 | |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
70 it = PageIterator(source) |
|
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
71 for page in it: |
|
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
72 uri = page['url'] |
|
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
73 sm_cfg = page.get('sitemap') |
| 34 | 74 |
|
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
75 args = {'url': uri, 'lastmod': cur_time} |
| 34 | 76 if sm_cfg: |
|
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
77 args.update(sm_cfg) |
| 34 | 78 |
| 79 self._writeEntry(args, fp) | |
| 80 | |
| 81 def _writeEntry(self, args, fp): | |
| 82 fp.write(SITEURL_HEADER) | |
| 83 fp.write(SITEURL_LOC % args['url']) | |
| 84 if 'lastmod' in args: | |
| 85 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
| 86 if 'changefreq' in args: | |
| 87 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
| 88 if 'priority' in args: | |
| 89 fp.write(SITEURL_PRIORITY % args['priority']) | |
| 90 fp.write(SITEURL_FOOTER) | |
| 91 | |
| 92 | |
| 93 def strftime_iso8601(t): | |
| 94 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
| 95 |
