annotate piecrust/processing/sitemap.py @ 1182:31e8ee0bf5b2

prepare: Refactor scaffolding code.
author Ludovic Chabant <ludovic@chabant.com>
date Thu, 21 May 2020 22:08:23 -0700
parents 727110ea112a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
853
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
1 import os
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
2 import os.path
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import time
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import logging
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 import yaml
854
08e02c2a2a1a core: Keep refactoring, this time to prepare for generator sources.
Ludovic Chabant <ludovic@chabant.com>
parents: 853
diff changeset
6 from piecrust.dataproviders.pageiterator import PageIterator
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
7 from piecrust.processing.base import SimpleFileProcessor
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
8
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10 logger = logging.getLogger(__name__)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13 SITEMAP_HEADER = \
979
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
14 """<?xml version="1.0" encoding="utf-8"?>
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
15 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
16 """
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
17 SITEMAP_FOOTER = "</urlset>\n"
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18
979
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
19 SITEURL_HEADER = " <url>\n" # NOQA: E222
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
20 SITEURL_LOC = " <loc>%s</loc>\n" # NOQA: E222
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
21 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" # NOQA: E222
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
22 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" # NOQA: E222
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
23 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" # NOQA: E222
45ad976712ec tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents: 854
diff changeset
24 SITEURL_FOOTER = " </url>\n" # NOQA: E222
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
26
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 class SitemapProcessor(SimpleFileProcessor):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
28 PROCESSOR_NAME = 'sitemap'
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30 def __init__(self):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
31 super(SitemapProcessor, self).__init__({'sitemap': 'xml'})
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
32 self._start_time = None
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
33
852
4850f8c21b6e core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents: 437
diff changeset
34 def onPipelineStart(self, ctx):
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
35 self._start_time = time.time()
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
36
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
37 def _doProcess(self, in_path, out_path):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
38 with open(in_path, 'r') as fp:
1164
727110ea112a core: Remove more YAML deprecation warnings.
Ludovic Chabant <ludovic@chabant.com>
parents: 979
diff changeset
39 sitemap = yaml.safe_load(fp)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
40
853
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
41 try:
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
42 with open(out_path, 'w') as fp:
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
43 fp.write(SITEMAP_HEADER)
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
44 self._writeManualLocs(sitemap, fp)
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
45 self._writeAutoLocs(sitemap, fp)
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
46 fp.write(SITEMAP_FOOTER)
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
47 except:
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
48 # If an exception occurs, delete the output file otherwise
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
49 # the pipeline will think the output was correctly produced.
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
50 if os.path.isfile(out_path):
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
51 logger.debug("Error occured, removing output sitemap.")
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
52 os.unlink(out_path)
f070a4fc033c core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents: 852
diff changeset
53 raise
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
54
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
55 return True
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
56
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
57 def _writeManualLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
58 locs = sitemap.setdefault('locations', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
59 if not locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
60 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
61
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
62 logger.debug("Generating manual sitemap entries.")
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
63 for loc in locs:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
64 self._writeEntry(loc, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
65
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66 def _writeAutoLocs(self, sitemap, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 source_names = sitemap.setdefault('autogen', None)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68 if not source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
69 return
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
70
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
71 cur_time = strftime_iso8601(time.time())
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
72 for name in source_names:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73 logger.debug("Generating automatic sitemap entries for '%s'." %
430
21e26ed867b6 internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents: 287
diff changeset
74 name)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
75 source = self.app.getSource(name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
76 if source is None:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
77 raise Exception("No such source: %s" % name)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
78
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
79 it = PageIterator(source)
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
80 for page in it:
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
81 uri = page['url']
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
82 sm_cfg = page.get('sitemap')
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
83
437
62274d805a6e bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents: 430
diff changeset
84 args = {'url': uri, 'lastmod': cur_time}
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
85 if sm_cfg:
234
1c4078ec3011 sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents: 147
diff changeset
86 args.update(sm_cfg)
34
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
87
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
88 self._writeEntry(args, fp)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
89
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
90 def _writeEntry(self, args, fp):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
91 fp.write(SITEURL_HEADER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
92 fp.write(SITEURL_LOC % args['url'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
93 if 'lastmod' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
94 fp.write(SITEURL_LASTMOD % args['lastmod'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
95 if 'changefreq' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
96 fp.write(SITEURL_CHANGEFREQ % args['changefreq'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
97 if 'priority' in args:
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
98 fp.write(SITEURL_PRIORITY % args['priority'])
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
99 fp.write(SITEURL_FOOTER)
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
100
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
101
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
102 def strftime_iso8601(t):
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
103 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
bdb103c57168 Add `sitemap` processor.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
104