Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | 62274d805a6e |
children | f070a4fc033c |
rev | line source |
---|---|
34 | 1 import time |
2 import logging | |
3 import yaml | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
4 from piecrust.data.iterators import PageIterator |
34 | 5 from piecrust.processing.base import SimpleFileProcessor |
6 | |
7 | |
8 logger = logging.getLogger(__name__) | |
9 | |
10 | |
11 SITEMAP_HEADER = \ | |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
437
diff
changeset
|
12 """<?xml version="1.0" encoding="utf-8"?> |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
437
diff
changeset
|
13 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
437
diff
changeset
|
14 """ |
34 | 15 SITEMAP_FOOTER = "</urlset>\n" |
16 | |
17 SITEURL_HEADER = " <url>\n" | |
18 SITEURL_LOC = " <loc>%s</loc>\n" | |
19 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" | |
20 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
21 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" |
34 | 22 SITEURL_FOOTER = " </url>\n" |
23 | |
24 | |
25 class SitemapProcessor(SimpleFileProcessor): | |
26 PROCESSOR_NAME = 'sitemap' | |
27 | |
28 def __init__(self): | |
29 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
30 self._start_time = None | |
31 | |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
437
diff
changeset
|
32 def onPipelineStart(self, ctx): |
34 | 33 self._start_time = time.time() |
34 | |
35 def _doProcess(self, in_path, out_path): | |
36 with open(in_path, 'r') as fp: | |
37 sitemap = yaml.load(fp) | |
38 | |
39 with open(out_path, 'w') as fp: | |
40 fp.write(SITEMAP_HEADER) | |
41 self._writeManualLocs(sitemap, fp) | |
42 self._writeAutoLocs(sitemap, fp) | |
43 fp.write(SITEMAP_FOOTER) | |
44 | |
45 return True | |
46 | |
47 def _writeManualLocs(self, sitemap, fp): | |
48 locs = sitemap.setdefault('locations', None) | |
49 if not locs: | |
50 return | |
51 | |
52 logger.debug("Generating manual sitemap entries.") | |
53 for loc in locs: | |
54 self._writeEntry(loc, fp) | |
55 | |
56 def _writeAutoLocs(self, sitemap, fp): | |
57 source_names = sitemap.setdefault('autogen', None) | |
58 if not source_names: | |
59 return | |
60 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
61 cur_time = strftime_iso8601(time.time()) |
34 | 62 for name in source_names: |
63 logger.debug("Generating automatic sitemap entries for '%s'." % | |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
64 name) |
34 | 65 source = self.app.getSource(name) |
66 if source is None: | |
67 raise Exception("No such source: %s" % name) | |
68 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
69 it = PageIterator(source) |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
70 for page in it: |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
71 uri = page['url'] |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
72 sm_cfg = page.get('sitemap') |
34 | 73 |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
74 args = {'url': uri, 'lastmod': cur_time} |
34 | 75 if sm_cfg: |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
76 args.update(sm_cfg) |
34 | 77 |
78 self._writeEntry(args, fp) | |
79 | |
80 def _writeEntry(self, args, fp): | |
81 fp.write(SITEURL_HEADER) | |
82 fp.write(SITEURL_LOC % args['url']) | |
83 if 'lastmod' in args: | |
84 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
85 if 'changefreq' in args: | |
86 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
87 if 'priority' in args: | |
88 fp.write(SITEURL_PRIORITY % args['priority']) | |
89 fp.write(SITEURL_FOOTER) | |
90 | |
91 | |
92 def strftime_iso8601(t): | |
93 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
94 |