Mercurial > piecrust2
comparison piecrust/baking/single.py @ 411:e7b865f8f335
bake: Enable multiprocess baking.
Baking is now done by running a worker per CPU, and sending jobs to them.
This changes several things across the codebase:
* Ability to not cache things related to pages other than the 'main' page
(i.e. the page at the bottom of the execution stack).
* Decouple the baking process from the bake records, so only the main process
keeps track (and modifies) the bake record.
* Remove the need for 'batch page getters' and loading a page directly from
the page factories.
There are various smaller changes too included here, including support for
scope performance timers that are saved with the bake record and can be
printed out to the console. Yes I got carried away.
For testing, the in-memory 'mock' file-system doesn't work anymore, since
we're spawning processes, so this is replaced by a 'tmpfs' file-system which
is saved in temporary files on disk and deleted after tests have run.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Fri, 12 Jun 2015 17:09:19 -0700 |
parents | 4b1019bb2533 |
children | 0e9a94b7fdfa |
comparison
equal
deleted
inserted
replaced
410:d1a472464e57 | 411:e7b865f8f335 |
---|---|
1 import os.path | 1 import os.path |
2 import copy | |
3 import shutil | 2 import shutil |
4 import codecs | 3 import codecs |
5 import logging | 4 import logging |
6 import urllib.parse | 5 import urllib.parse |
7 from piecrust.baking.records import ( | 6 from piecrust.baking.records import ( |
8 BakeRecordPassInfo, BakeRecordPageEntry, BakeRecordSubPageEntry) | 7 PageBakeInfo, SubPageBakeInfo, BakePassInfo) |
9 from piecrust.data.filters import ( | |
10 PaginationFilter, HasFilterClause, | |
11 IsFilterClause, AndBooleanClause, | |
12 page_value_accessor) | |
13 from piecrust.rendering import ( | 8 from piecrust.rendering import ( |
14 QualifiedPage, PageRenderingContext, render_page, | 9 QualifiedPage, PageRenderingContext, render_page, |
15 PASS_FORMATTING, PASS_RENDERING) | 10 PASS_FORMATTING) |
16 from piecrust.sources.base import ( | |
17 PageFactory, | |
18 REALM_NAMES, REALM_USER, REALM_THEME) | |
19 from piecrust.uriutil import split_uri | 11 from piecrust.uriutil import split_uri |
20 | 12 |
21 | 13 |
22 logger = logging.getLogger(__name__) | 14 logger = logging.getLogger(__name__) |
23 | 15 |
33 class BakingError(Exception): | 25 class BakingError(Exception): |
34 pass | 26 pass |
35 | 27 |
36 | 28 |
37 class PageBaker(object): | 29 class PageBaker(object): |
38 def __init__(self, app, out_dir, force=False, record=None, | 30 def __init__(self, app, out_dir, force=False, copy_assets=True): |
39 copy_assets=True): | |
40 self.app = app | 31 self.app = app |
41 self.out_dir = out_dir | 32 self.out_dir = out_dir |
42 self.force = force | 33 self.force = force |
43 self.record = record | |
44 self.copy_assets = copy_assets | 34 self.copy_assets = copy_assets |
45 self.site_root = app.config.get('site/root') | 35 self.site_root = app.config.get('site/root') |
46 self.pretty_urls = app.config.get('site/pretty_urls') | 36 self.pretty_urls = app.config.get('site/pretty_urls') |
47 | 37 |
48 def getOutputPath(self, uri): | 38 def getOutputPath(self, uri): |
58 else: | 48 else: |
59 bake_path.append(decoded_uri) | 49 bake_path.append(decoded_uri) |
60 | 50 |
61 return os.path.normpath(os.path.join(*bake_path)) | 51 return os.path.normpath(os.path.join(*bake_path)) |
62 | 52 |
63 def bake(self, factory, route, record_entry): | 53 def bake(self, factory, route, route_metadata, prev_entry, |
54 first_render_info, dirty_source_names, tax_info=None): | |
64 # Get the page. | 55 # Get the page. |
65 page = factory.buildPage() | 56 page = factory.buildPage() |
66 route_metadata = copy.deepcopy(factory.metadata) | |
67 | |
68 # Add taxonomy info in the template data and route metadata if needed. | |
69 bake_taxonomy_info = None | |
70 if record_entry.taxonomy_info: | |
71 tax_name, tax_term, tax_source_name = record_entry.taxonomy_info | |
72 taxonomy = self.app.getTaxonomy(tax_name) | |
73 slugified_term = route.slugifyTaxonomyTerm(tax_term) | |
74 route_metadata[taxonomy.term_name] = slugified_term | |
75 bake_taxonomy_info = (taxonomy, tax_term) | |
76 | |
77 # Generate the URI. | |
78 uri = route.getUri(route_metadata, provider=page) | |
79 | |
80 # See if this URL has been overriden by a previously baked page. | |
81 # If that page is from another realm (e.g. a user page vs. a theme | |
82 # page), we silently skip this page. If they're from the same realm, | |
83 # we don't allow overriding and raise an error (this is probably | |
84 # because of a misconfigured configuration that allows for ambiguous | |
85 # URLs between 2 routes or sources). | |
86 override = self.record.getOverrideEntry(factory, uri) | |
87 if override is not None: | |
88 override_source = self.app.getSource(override.source_name) | |
89 if override_source.realm == factory.source.realm: | |
90 raise BakingError( | |
91 "Page '%s' maps to URL '%s' but is overriden by page" | |
92 "'%s:%s'." % (factory.ref_spec, uri, | |
93 override.source_name, | |
94 override.rel_path)) | |
95 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % | |
96 (factory.ref_spec, uri, override.source_name, | |
97 override.rel_path)) | |
98 record_entry.flags |= BakeRecordPageEntry.FLAG_OVERRIDEN | |
99 return | |
100 | |
101 # Setup the record entry. | |
102 record_entry.config = copy_public_page_config(page.config) | |
103 | 57 |
104 # Start baking the sub-pages. | 58 # Start baking the sub-pages. |
105 cur_sub = 1 | 59 cur_sub = 1 |
106 has_more_subs = True | 60 has_more_subs = True |
107 force_this = self.force | 61 report = PageBakeInfo() |
108 invalidate_formatting = False | |
109 prev_record_entry = self.record.getPreviousEntry( | |
110 factory.source.name, factory.rel_path, | |
111 record_entry.taxonomy_info) | |
112 | |
113 logger.debug("Baking '%s'..." % uri) | |
114 | 62 |
115 while has_more_subs: | 63 while has_more_subs: |
116 # Get the URL and path for this sub-page. | 64 # Get the URL and path for this sub-page. |
117 sub_uri = route.getUri(route_metadata, sub_num=cur_sub, | 65 sub_uri = route.getUri(route_metadata, sub_num=cur_sub, |
118 provider=page) | 66 provider=page) |
67 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub)) | |
119 out_path = self.getOutputPath(sub_uri) | 68 out_path = self.getOutputPath(sub_uri) |
120 | 69 |
121 # Create the sub-entry for the bake record. | 70 # Create the sub-entry for the bake record. |
122 record_sub_entry = BakeRecordSubPageEntry(sub_uri, out_path) | 71 sub_entry = SubPageBakeInfo(sub_uri, out_path) |
123 record_entry.subs.append(record_sub_entry) | 72 report.subs.append(sub_entry) |
124 | 73 |
125 # Find a corresponding sub-entry in the previous bake record. | 74 # Find a corresponding sub-entry in the previous bake record. |
126 prev_record_sub_entry = None | 75 prev_sub_entry = None |
127 if prev_record_entry: | 76 if prev_entry: |
128 try: | 77 try: |
129 prev_record_sub_entry = prev_record_entry.getSub(cur_sub) | 78 prev_sub_entry = prev_entry.getSub(cur_sub) |
130 except IndexError: | 79 except IndexError: |
131 pass | 80 pass |
132 | 81 |
133 # Figure out what to do with this page. | 82 # Figure out if we need to invalidate or force anything. |
134 if (prev_record_sub_entry and | 83 force_this_sub, invalidate_formatting = _compute_force_flags( |
135 (prev_record_sub_entry.was_baked_successfully or | 84 prev_sub_entry, sub_entry, dirty_source_names) |
136 prev_record_sub_entry.was_clean)): | 85 force_this_sub = force_this_sub or self.force |
137 # If the current page is known to use pages from other sources, | |
138 # see if any of those got baked, or are going to be baked for | |
139 # some reason. If so, we need to bake this one too. | |
140 # (this happens for instance with the main page of a blog). | |
141 dirty_src_names, invalidated_render_passes = ( | |
142 self._getDirtySourceNamesAndRenderPasses( | |
143 prev_record_sub_entry)) | |
144 if len(invalidated_render_passes) > 0: | |
145 logger.debug( | |
146 "'%s' is known to use sources %s, which have " | |
147 "items that got (re)baked. Will force bake this " | |
148 "page. " % (uri, dirty_src_names)) | |
149 record_sub_entry.flags |= \ | |
150 BakeRecordSubPageEntry.FLAG_FORCED_BY_SOURCE | |
151 force_this = True | |
152 | |
153 if PASS_FORMATTING in invalidated_render_passes: | |
154 logger.debug( | |
155 "Will invalidate cached formatting for '%s' " | |
156 "since sources were using during that pass." | |
157 % uri) | |
158 invalidate_formatting = True | |
159 elif (prev_record_sub_entry and | |
160 prev_record_sub_entry.errors): | |
161 # Previous bake failed. We'll have to bake it again. | |
162 logger.debug( | |
163 "Previous record entry indicates baking failed for " | |
164 "'%s'. Will bake it again." % uri) | |
165 record_sub_entry.flags |= \ | |
166 BakeRecordSubPageEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS | |
167 force_this = True | |
168 elif not prev_record_sub_entry: | |
169 # No previous record. We'll have to bake it. | |
170 logger.debug("No previous record entry found for '%s'. Will " | |
171 "force bake it." % uri) | |
172 record_sub_entry.flags |= \ | |
173 BakeRecordSubPageEntry.FLAG_FORCED_BY_NO_PREVIOUS | |
174 force_this = True | |
175 | 86 |
176 # Check for up-to-date outputs. | 87 # Check for up-to-date outputs. |
177 do_bake = True | 88 do_bake = True |
178 if not force_this: | 89 if not force_this_sub: |
179 try: | 90 try: |
180 in_path_time = page.path_mtime | 91 in_path_time = page.path_mtime |
181 out_path_time = os.path.getmtime(out_path) | 92 out_path_time = os.path.getmtime(out_path) |
182 if out_path_time >= in_path_time: | 93 if out_path_time >= in_path_time: |
183 do_bake = False | 94 do_bake = False |
186 pass | 97 pass |
187 | 98 |
188 # If this page didn't bake because it's already up-to-date. | 99 # If this page didn't bake because it's already up-to-date. |
189 # Keep trying for as many subs as we know this page has. | 100 # Keep trying for as many subs as we know this page has. |
190 if not do_bake: | 101 if not do_bake: |
191 prev_record_sub_entry.collapseRenderPasses(record_sub_entry) | 102 prev_sub_entry.collapseRenderPasses(sub_entry) |
192 record_sub_entry.flags = BakeRecordSubPageEntry.FLAG_NONE | 103 sub_entry.flags = SubPageBakeInfo.FLAG_NONE |
193 | 104 |
194 if prev_record_entry.num_subs >= cur_sub + 1: | 105 if prev_entry.num_subs >= cur_sub + 1: |
195 cur_sub += 1 | 106 cur_sub += 1 |
196 has_more_subs = True | 107 has_more_subs = True |
197 logger.debug(" %s is up to date, skipping to next " | 108 logger.debug(" %s is up to date, skipping to next " |
198 "sub-page." % out_path) | 109 "sub-page." % out_path) |
199 continue | 110 continue |
205 try: | 116 try: |
206 if invalidate_formatting: | 117 if invalidate_formatting: |
207 cache_key = sub_uri | 118 cache_key = sub_uri |
208 self.app.env.rendered_segments_repository.invalidate( | 119 self.app.env.rendered_segments_repository.invalidate( |
209 cache_key) | 120 cache_key) |
210 record_sub_entry.flags |= \ | 121 sub_entry.flags |= \ |
211 BakeRecordSubPageEntry.FLAG_FORMATTING_INVALIDATED | 122 SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED |
212 | 123 |
213 logger.debug(" p%d -> %s" % (cur_sub, out_path)) | 124 logger.debug(" p%d -> %s" % (cur_sub, out_path)) |
214 qp = QualifiedPage(page, route, route_metadata) | 125 qp = QualifiedPage(page, route, route_metadata) |
215 ctx, rp = self._bakeSingle(qp, cur_sub, out_path, | 126 ctx, rp = self._bakeSingle(qp, cur_sub, out_path, tax_info) |
216 bake_taxonomy_info) | |
217 except Exception as ex: | 127 except Exception as ex: |
218 if self.app.debug: | 128 if self.app.debug: |
219 logger.exception(ex) | 129 logger.exception(ex) |
220 page_rel_path = os.path.relpath(page.path, self.app.root_dir) | 130 page_rel_path = os.path.relpath(page.path, self.app.root_dir) |
221 raise BakingError("%s: error baking '%s'." % | 131 raise BakingError("%s: error baking '%s'." % |
222 (page_rel_path, uri)) from ex | 132 (page_rel_path, sub_uri)) from ex |
223 | 133 |
224 # Record what we did. | 134 # Record what we did. |
225 record_sub_entry.flags |= BakeRecordSubPageEntry.FLAG_BAKED | 135 sub_entry.flags |= SubPageBakeInfo.FLAG_BAKED |
226 self.record.dirty_source_names.add(record_entry.source_name) | 136 # self.record.dirty_source_names.add(record_entry.source_name) |
227 for p, pinfo in ctx.render_passes.items(): | 137 for p, pinfo in ctx.render_passes.items(): |
228 brpi = BakeRecordPassInfo() | 138 bpi = BakePassInfo() |
229 brpi.used_source_names = set(pinfo.used_source_names) | 139 bpi.used_source_names = set(pinfo.used_source_names) |
230 brpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms) | 140 bpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms) |
231 record_sub_entry.render_passes[p] = brpi | 141 sub_entry.render_passes[p] = bpi |
232 if prev_record_sub_entry: | 142 if prev_sub_entry: |
233 prev_record_sub_entry.collapseRenderPasses(record_sub_entry) | 143 prev_sub_entry.collapseRenderPasses(sub_entry) |
144 | |
145 # If this page has had its first sub-page rendered already, we | |
146 # have that information from the baker. Otherwise (e.g. for | |
147 # taxonomy pages), we have that information from the result | |
148 # of the render. | |
149 info = ctx | |
150 if cur_sub == 1 and first_render_info is not None: | |
151 info = first_render_info | |
234 | 152 |
235 # Copy page assets. | 153 # Copy page assets. |
236 if (cur_sub == 1 and self.copy_assets and | 154 if cur_sub == 1 and self.copy_assets and info.used_assets: |
237 ctx.used_assets is not None): | |
238 if self.pretty_urls: | 155 if self.pretty_urls: |
239 out_assets_dir = os.path.dirname(out_path) | 156 out_assets_dir = os.path.dirname(out_path) |
240 else: | 157 else: |
241 out_assets_dir, out_name = os.path.split(out_path) | 158 out_assets_dir, out_name = os.path.split(out_path) |
242 if sub_uri != self.site_root: | 159 if sub_uri != self.site_root: |
243 out_name_noext, _ = os.path.splitext(out_name) | 160 out_name_noext, _ = os.path.splitext(out_name) |
244 out_assets_dir += out_name_noext | 161 out_assets_dir += out_name_noext |
245 | 162 |
246 logger.debug("Copying page assets to: %s" % out_assets_dir) | 163 logger.debug("Copying page assets to: %s" % out_assets_dir) |
247 if not os.path.isdir(out_assets_dir): | 164 _ensure_dir_exists(out_assets_dir) |
248 os.makedirs(out_assets_dir, 0o755) | 165 |
249 for ap in ctx.used_assets: | 166 used_assets = info.used_assets |
167 for ap in used_assets: | |
250 dest_ap = os.path.join(out_assets_dir, | 168 dest_ap = os.path.join(out_assets_dir, |
251 os.path.basename(ap)) | 169 os.path.basename(ap)) |
252 logger.debug(" %s -> %s" % (ap, dest_ap)) | 170 logger.debug(" %s -> %s" % (ap, dest_ap)) |
253 shutil.copy(ap, dest_ap) | 171 shutil.copy(ap, dest_ap) |
254 record_entry.assets.append(ap) | 172 report.assets.append(ap) |
255 | 173 |
256 # Figure out if we have more work. | 174 # Figure out if we have more work. |
257 has_more_subs = False | 175 has_more_subs = False |
258 if ctx.used_pagination is not None: | 176 if info.pagination_has_more: |
259 if ctx.used_pagination.has_more: | 177 cur_sub += 1 |
260 cur_sub += 1 | 178 has_more_subs = True |
261 has_more_subs = True | 179 |
262 | 180 return report |
263 def _bakeSingle(self, qualified_page, num, out_path, taxonomy_info=None): | 181 |
182 def _bakeSingle(self, qualified_page, num, out_path, tax_info=None): | |
264 ctx = PageRenderingContext(qualified_page, page_num=num) | 183 ctx = PageRenderingContext(qualified_page, page_num=num) |
265 if taxonomy_info: | 184 if tax_info: |
266 ctx.setTaxonomyFilter(taxonomy_info[0], taxonomy_info[1]) | 185 tax = self.app.getTaxonomy(tax_info.taxonomy_name) |
186 ctx.setTaxonomyFilter(tax, tax_info.term) | |
267 | 187 |
268 rp = render_page(ctx) | 188 rp = render_page(ctx) |
269 | 189 |
270 out_dir = os.path.dirname(out_path) | 190 out_dir = os.path.dirname(out_path) |
271 if not os.path.isdir(out_dir): | 191 _ensure_dir_exists(out_dir) |
272 os.makedirs(out_dir, 0o755) | |
273 | 192 |
274 with codecs.open(out_path, 'w', 'utf8') as fp: | 193 with codecs.open(out_path, 'w', 'utf8') as fp: |
275 fp.write(rp.content) | 194 fp.write(rp.content) |
276 | 195 |
277 return ctx, rp | 196 return ctx, rp |
278 | 197 |
279 def _getDirtySourceNamesAndRenderPasses(self, record_sub_entry): | 198 |
280 dirty_src_names = set() | 199 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names): |
281 invalidated_render_passes = set() | 200 # Figure out what to do with this page. |
282 for p, pinfo in record_sub_entry.render_passes.items(): | 201 force_this_sub = False |
283 for src_name in pinfo.used_source_names: | 202 invalidate_formatting = False |
284 is_dirty = (src_name in self.record.dirty_source_names) | 203 sub_uri = sub_entry.out_uri |
285 if is_dirty: | 204 if (prev_sub_entry and |
286 invalidated_render_passes.add(p) | 205 (prev_sub_entry.was_baked_successfully or |
287 dirty_src_names.add(src_name) | 206 prev_sub_entry.was_clean)): |
288 break | 207 # If the current page is known to use pages from other sources, |
289 return dirty_src_names, invalidated_render_passes | 208 # see if any of those got baked, or are going to be baked for |
290 | 209 # some reason. If so, we need to bake this one too. |
210 # (this happens for instance with the main page of a blog). | |
211 dirty_for_this, invalidated_render_passes = ( | |
212 _get_dirty_source_names_and_render_passes( | |
213 prev_sub_entry, dirty_source_names)) | |
214 if len(invalidated_render_passes) > 0: | |
215 logger.debug( | |
216 "'%s' is known to use sources %s, which have " | |
217 "items that got (re)baked. Will force bake this " | |
218 "page. " % (sub_uri, dirty_for_this)) | |
219 sub_entry.flags |= \ | |
220 SubPageBakeInfo.FLAG_FORCED_BY_SOURCE | |
221 force_this_sub = True | |
222 | |
223 if PASS_FORMATTING in invalidated_render_passes: | |
224 logger.debug( | |
225 "Will invalidate cached formatting for '%s' " | |
226 "since sources were using during that pass." | |
227 % sub_uri) | |
228 invalidate_formatting = True | |
229 elif (prev_sub_entry and | |
230 prev_sub_entry.errors): | |
231 # Previous bake failed. We'll have to bake it again. | |
232 logger.debug( | |
233 "Previous record entry indicates baking failed for " | |
234 "'%s'. Will bake it again." % sub_uri) | |
235 sub_entry.flags |= \ | |
236 SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS | |
237 force_this_sub = True | |
238 elif not prev_sub_entry: | |
239 # No previous record. We'll have to bake it. | |
240 logger.debug("No previous record entry found for '%s'. Will " | |
241 "force bake it." % sub_uri) | |
242 sub_entry.flags |= \ | |
243 SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS | |
244 force_this_sub = True | |
245 | |
246 return force_this_sub, invalidate_formatting | |
247 | |
248 | |
249 def _get_dirty_source_names_and_render_passes( | |
250 sub_entry, dirty_source_names): | |
251 dirty_for_this = set() | |
252 invalidated_render_passes = set() | |
253 for p, pinfo in sub_entry.render_passes.items(): | |
254 for src_name in pinfo.used_source_names: | |
255 is_dirty = (src_name in dirty_source_names) | |
256 if is_dirty: | |
257 invalidated_render_passes.add(p) | |
258 dirty_for_this.add(src_name) | |
259 break | |
260 return dirty_for_this, invalidated_render_passes | |
261 | |
262 | |
263 def _ensure_dir_exists(path): | |
264 try: | |
265 os.makedirs(path, mode=0o755, exist_ok=True) | |
266 except OSError: | |
267 # In a multiprocess environment, several process may very | |
268 # occasionally try to create the same directory at the same time. | |
269 # Let's ignore any error and if something's really wrong (like file | |
270 # acces permissions or whatever), then it will more legitimately fail | |
271 # just after this when we try to write files. | |
272 pass | |
273 |