Mercurial > piecrust2
annotate piecrust/processing/sitemap.py @ 1145:e94737572542
serve: Fix an issue where false positive matches were rendered as the requested page.
Now we try to render the page, but also try to detect for the most common "empty" pages.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 05 Jun 2018 22:08:51 -0700 |
parents | 45ad976712ec |
children | 727110ea112a |
rev | line source |
---|---|
853
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
1 import os |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
2 import os.path |
34 | 3 import time |
4 import logging | |
5 import yaml | |
854
08e02c2a2a1a
core: Keep refactoring, this time to prepare for generator sources.
Ludovic Chabant <ludovic@chabant.com>
parents:
853
diff
changeset
|
6 from piecrust.dataproviders.pageiterator import PageIterator |
34 | 7 from piecrust.processing.base import SimpleFileProcessor |
8 | |
9 | |
10 logger = logging.getLogger(__name__) | |
11 | |
12 | |
13 SITEMAP_HEADER = \ | |
979
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
14 """<?xml version="1.0" encoding="utf-8"?> |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
15 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
16 """ |
34 | 17 SITEMAP_FOOTER = "</urlset>\n" |
18 | |
979
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
19 SITEURL_HEADER = " <url>\n" # NOQA: E222 |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
20 SITEURL_LOC = " <loc>%s</loc>\n" # NOQA: E222 |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
21 SITEURL_LASTMOD = " <lastmod>%s</lastmod>\n" # NOQA: E222 |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
22 SITEURL_CHANGEFREQ = " <changefreq>%s</changefreq>\n" # NOQA: E222 |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
23 SITEURL_PRIORITY = " <priority>%0.1f</priority>\n" # NOQA: E222 |
45ad976712ec
tests: Big push to get the tests to pass again.
Ludovic Chabant <ludovic@chabant.com>
parents:
854
diff
changeset
|
24 SITEURL_FOOTER = " </url>\n" # NOQA: E222 |
34 | 25 |
26 | |
27 class SitemapProcessor(SimpleFileProcessor): | |
28 PROCESSOR_NAME = 'sitemap' | |
29 | |
30 def __init__(self): | |
31 super(SitemapProcessor, self).__init__({'sitemap': 'xml'}) | |
32 self._start_time = None | |
33 | |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
437
diff
changeset
|
34 def onPipelineStart(self, ctx): |
34 | 35 self._start_time = time.time() |
36 | |
37 def _doProcess(self, in_path, out_path): | |
38 with open(in_path, 'r') as fp: | |
39 sitemap = yaml.load(fp) | |
40 | |
853
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
41 try: |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
42 with open(out_path, 'w') as fp: |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
43 fp.write(SITEMAP_HEADER) |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
44 self._writeManualLocs(sitemap, fp) |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
45 self._writeAutoLocs(sitemap, fp) |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
46 fp.write(SITEMAP_FOOTER) |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
47 except: |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
48 # If an exception occurs, delete the output file otherwise |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
49 # the pipeline will think the output was correctly produced. |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
50 if os.path.isfile(out_path): |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
51 logger.debug("Error occured, removing output sitemap.") |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
52 os.unlink(out_path) |
f070a4fc033c
core: Continue PieCrust3 refactor, simplify pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
852
diff
changeset
|
53 raise |
34 | 54 |
55 return True | |
56 | |
57 def _writeManualLocs(self, sitemap, fp): | |
58 locs = sitemap.setdefault('locations', None) | |
59 if not locs: | |
60 return | |
61 | |
62 logger.debug("Generating manual sitemap entries.") | |
63 for loc in locs: | |
64 self._writeEntry(loc, fp) | |
65 | |
66 def _writeAutoLocs(self, sitemap, fp): | |
67 source_names = sitemap.setdefault('autogen', None) | |
68 if not source_names: | |
69 return | |
70 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
71 cur_time = strftime_iso8601(time.time()) |
34 | 72 for name in source_names: |
73 logger.debug("Generating automatic sitemap entries for '%s'." % | |
430
21e26ed867b6
internal: Create full route metadata in one place.
Ludovic Chabant <ludovic@chabant.com>
parents:
287
diff
changeset
|
74 name) |
34 | 75 source = self.app.getSource(name) |
76 if source is None: | |
77 raise Exception("No such source: %s" % name) | |
78 | |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
79 it = PageIterator(source) |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
80 for page in it: |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
81 uri = page['url'] |
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
82 sm_cfg = page.get('sitemap') |
34 | 83 |
437
62274d805a6e
bake: Tweaks to the `sitemap` processor. Add tests.
Ludovic Chabant <ludovic@chabant.com>
parents:
430
diff
changeset
|
84 args = {'url': uri, 'lastmod': cur_time} |
34 | 85 if sm_cfg: |
234
1c4078ec3011
sitemap: Fixed typo bug.
Ludovic Chabant <ludovic@chabant.com>
parents:
147
diff
changeset
|
86 args.update(sm_cfg) |
34 | 87 |
88 self._writeEntry(args, fp) | |
89 | |
90 def _writeEntry(self, args, fp): | |
91 fp.write(SITEURL_HEADER) | |
92 fp.write(SITEURL_LOC % args['url']) | |
93 if 'lastmod' in args: | |
94 fp.write(SITEURL_LASTMOD % args['lastmod']) | |
95 if 'changefreq' in args: | |
96 fp.write(SITEURL_CHANGEFREQ % args['changefreq']) | |
97 if 'priority' in args: | |
98 fp.write(SITEURL_PRIORITY % args['priority']) | |
99 fp.write(SITEURL_FOOTER) | |
100 | |
101 | |
102 def strftime_iso8601(t): | |
103 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t)) | |
104 |