comparison piecrust/baking/single.py @ 411:e7b865f8f335

bake: Enable multiprocess baking. Baking is now done by running a worker per CPU, and sending jobs to them. This changes several things across the codebase: * Ability to not cache things related to pages other than the 'main' page (i.e. the page at the bottom of the execution stack). * Decouple the baking process from the bake records, so only the main process keeps track (and modifies) the bake record. * Remove the need for 'batch page getters' and loading a page directly from the page factories. There are various smaller changes too included here, including support for scope performance timers that are saved with the bake record and can be printed out to the console. Yes I got carried away. For testing, the in-memory 'mock' file-system doesn't work anymore, since we're spawning processes, so this is replaced by a 'tmpfs' file-system which is saved in temporary files on disk and deleted after tests have run.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 12 Jun 2015 17:09:19 -0700
parents 4b1019bb2533
children 0e9a94b7fdfa
comparison
equal deleted inserted replaced
410:d1a472464e57 411:e7b865f8f335
1 import os.path 1 import os.path
2 import copy
3 import shutil 2 import shutil
4 import codecs 3 import codecs
5 import logging 4 import logging
6 import urllib.parse 5 import urllib.parse
7 from piecrust.baking.records import ( 6 from piecrust.baking.records import (
8 BakeRecordPassInfo, BakeRecordPageEntry, BakeRecordSubPageEntry) 7 PageBakeInfo, SubPageBakeInfo, BakePassInfo)
9 from piecrust.data.filters import (
10 PaginationFilter, HasFilterClause,
11 IsFilterClause, AndBooleanClause,
12 page_value_accessor)
13 from piecrust.rendering import ( 8 from piecrust.rendering import (
14 QualifiedPage, PageRenderingContext, render_page, 9 QualifiedPage, PageRenderingContext, render_page,
15 PASS_FORMATTING, PASS_RENDERING) 10 PASS_FORMATTING)
16 from piecrust.sources.base import (
17 PageFactory,
18 REALM_NAMES, REALM_USER, REALM_THEME)
19 from piecrust.uriutil import split_uri 11 from piecrust.uriutil import split_uri
20 12
21 13
22 logger = logging.getLogger(__name__) 14 logger = logging.getLogger(__name__)
23 15
33 class BakingError(Exception): 25 class BakingError(Exception):
34 pass 26 pass
35 27
36 28
37 class PageBaker(object): 29 class PageBaker(object):
38 def __init__(self, app, out_dir, force=False, record=None, 30 def __init__(self, app, out_dir, force=False, copy_assets=True):
39 copy_assets=True):
40 self.app = app 31 self.app = app
41 self.out_dir = out_dir 32 self.out_dir = out_dir
42 self.force = force 33 self.force = force
43 self.record = record
44 self.copy_assets = copy_assets 34 self.copy_assets = copy_assets
45 self.site_root = app.config.get('site/root') 35 self.site_root = app.config.get('site/root')
46 self.pretty_urls = app.config.get('site/pretty_urls') 36 self.pretty_urls = app.config.get('site/pretty_urls')
47 37
48 def getOutputPath(self, uri): 38 def getOutputPath(self, uri):
58 else: 48 else:
59 bake_path.append(decoded_uri) 49 bake_path.append(decoded_uri)
60 50
61 return os.path.normpath(os.path.join(*bake_path)) 51 return os.path.normpath(os.path.join(*bake_path))
62 52
63 def bake(self, factory, route, record_entry): 53 def bake(self, factory, route, route_metadata, prev_entry,
54 first_render_info, dirty_source_names, tax_info=None):
64 # Get the page. 55 # Get the page.
65 page = factory.buildPage() 56 page = factory.buildPage()
66 route_metadata = copy.deepcopy(factory.metadata)
67
68 # Add taxonomy info in the template data and route metadata if needed.
69 bake_taxonomy_info = None
70 if record_entry.taxonomy_info:
71 tax_name, tax_term, tax_source_name = record_entry.taxonomy_info
72 taxonomy = self.app.getTaxonomy(tax_name)
73 slugified_term = route.slugifyTaxonomyTerm(tax_term)
74 route_metadata[taxonomy.term_name] = slugified_term
75 bake_taxonomy_info = (taxonomy, tax_term)
76
77 # Generate the URI.
78 uri = route.getUri(route_metadata, provider=page)
79
80 # See if this URL has been overriden by a previously baked page.
81 # If that page is from another realm (e.g. a user page vs. a theme
82 # page), we silently skip this page. If they're from the same realm,
83 # we don't allow overriding and raise an error (this is probably
84 # because of a misconfigured configuration that allows for ambiguous
85 # URLs between 2 routes or sources).
86 override = self.record.getOverrideEntry(factory, uri)
87 if override is not None:
88 override_source = self.app.getSource(override.source_name)
89 if override_source.realm == factory.source.realm:
90 raise BakingError(
91 "Page '%s' maps to URL '%s' but is overriden by page"
92 "'%s:%s'." % (factory.ref_spec, uri,
93 override.source_name,
94 override.rel_path))
95 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" %
96 (factory.ref_spec, uri, override.source_name,
97 override.rel_path))
98 record_entry.flags |= BakeRecordPageEntry.FLAG_OVERRIDEN
99 return
100
101 # Setup the record entry.
102 record_entry.config = copy_public_page_config(page.config)
103 57
104 # Start baking the sub-pages. 58 # Start baking the sub-pages.
105 cur_sub = 1 59 cur_sub = 1
106 has_more_subs = True 60 has_more_subs = True
107 force_this = self.force 61 report = PageBakeInfo()
108 invalidate_formatting = False
109 prev_record_entry = self.record.getPreviousEntry(
110 factory.source.name, factory.rel_path,
111 record_entry.taxonomy_info)
112
113 logger.debug("Baking '%s'..." % uri)
114 62
115 while has_more_subs: 63 while has_more_subs:
116 # Get the URL and path for this sub-page. 64 # Get the URL and path for this sub-page.
117 sub_uri = route.getUri(route_metadata, sub_num=cur_sub, 65 sub_uri = route.getUri(route_metadata, sub_num=cur_sub,
118 provider=page) 66 provider=page)
67 logger.debug("Baking '%s' [%d]..." % (sub_uri, cur_sub))
119 out_path = self.getOutputPath(sub_uri) 68 out_path = self.getOutputPath(sub_uri)
120 69
121 # Create the sub-entry for the bake record. 70 # Create the sub-entry for the bake record.
122 record_sub_entry = BakeRecordSubPageEntry(sub_uri, out_path) 71 sub_entry = SubPageBakeInfo(sub_uri, out_path)
123 record_entry.subs.append(record_sub_entry) 72 report.subs.append(sub_entry)
124 73
125 # Find a corresponding sub-entry in the previous bake record. 74 # Find a corresponding sub-entry in the previous bake record.
126 prev_record_sub_entry = None 75 prev_sub_entry = None
127 if prev_record_entry: 76 if prev_entry:
128 try: 77 try:
129 prev_record_sub_entry = prev_record_entry.getSub(cur_sub) 78 prev_sub_entry = prev_entry.getSub(cur_sub)
130 except IndexError: 79 except IndexError:
131 pass 80 pass
132 81
133 # Figure out what to do with this page. 82 # Figure out if we need to invalidate or force anything.
134 if (prev_record_sub_entry and 83 force_this_sub, invalidate_formatting = _compute_force_flags(
135 (prev_record_sub_entry.was_baked_successfully or 84 prev_sub_entry, sub_entry, dirty_source_names)
136 prev_record_sub_entry.was_clean)): 85 force_this_sub = force_this_sub or self.force
137 # If the current page is known to use pages from other sources,
138 # see if any of those got baked, or are going to be baked for
139 # some reason. If so, we need to bake this one too.
140 # (this happens for instance with the main page of a blog).
141 dirty_src_names, invalidated_render_passes = (
142 self._getDirtySourceNamesAndRenderPasses(
143 prev_record_sub_entry))
144 if len(invalidated_render_passes) > 0:
145 logger.debug(
146 "'%s' is known to use sources %s, which have "
147 "items that got (re)baked. Will force bake this "
148 "page. " % (uri, dirty_src_names))
149 record_sub_entry.flags |= \
150 BakeRecordSubPageEntry.FLAG_FORCED_BY_SOURCE
151 force_this = True
152
153 if PASS_FORMATTING in invalidated_render_passes:
154 logger.debug(
155 "Will invalidate cached formatting for '%s' "
156 "since sources were using during that pass."
157 % uri)
158 invalidate_formatting = True
159 elif (prev_record_sub_entry and
160 prev_record_sub_entry.errors):
161 # Previous bake failed. We'll have to bake it again.
162 logger.debug(
163 "Previous record entry indicates baking failed for "
164 "'%s'. Will bake it again." % uri)
165 record_sub_entry.flags |= \
166 BakeRecordSubPageEntry.FLAG_FORCED_BY_PREVIOUS_ERRORS
167 force_this = True
168 elif not prev_record_sub_entry:
169 # No previous record. We'll have to bake it.
170 logger.debug("No previous record entry found for '%s'. Will "
171 "force bake it." % uri)
172 record_sub_entry.flags |= \
173 BakeRecordSubPageEntry.FLAG_FORCED_BY_NO_PREVIOUS
174 force_this = True
175 86
176 # Check for up-to-date outputs. 87 # Check for up-to-date outputs.
177 do_bake = True 88 do_bake = True
178 if not force_this: 89 if not force_this_sub:
179 try: 90 try:
180 in_path_time = page.path_mtime 91 in_path_time = page.path_mtime
181 out_path_time = os.path.getmtime(out_path) 92 out_path_time = os.path.getmtime(out_path)
182 if out_path_time >= in_path_time: 93 if out_path_time >= in_path_time:
183 do_bake = False 94 do_bake = False
186 pass 97 pass
187 98
188 # If this page didn't bake because it's already up-to-date. 99 # If this page didn't bake because it's already up-to-date.
189 # Keep trying for as many subs as we know this page has. 100 # Keep trying for as many subs as we know this page has.
190 if not do_bake: 101 if not do_bake:
191 prev_record_sub_entry.collapseRenderPasses(record_sub_entry) 102 prev_sub_entry.collapseRenderPasses(sub_entry)
192 record_sub_entry.flags = BakeRecordSubPageEntry.FLAG_NONE 103 sub_entry.flags = SubPageBakeInfo.FLAG_NONE
193 104
194 if prev_record_entry.num_subs >= cur_sub + 1: 105 if prev_entry.num_subs >= cur_sub + 1:
195 cur_sub += 1 106 cur_sub += 1
196 has_more_subs = True 107 has_more_subs = True
197 logger.debug(" %s is up to date, skipping to next " 108 logger.debug(" %s is up to date, skipping to next "
198 "sub-page." % out_path) 109 "sub-page." % out_path)
199 continue 110 continue
205 try: 116 try:
206 if invalidate_formatting: 117 if invalidate_formatting:
207 cache_key = sub_uri 118 cache_key = sub_uri
208 self.app.env.rendered_segments_repository.invalidate( 119 self.app.env.rendered_segments_repository.invalidate(
209 cache_key) 120 cache_key)
210 record_sub_entry.flags |= \ 121 sub_entry.flags |= \
211 BakeRecordSubPageEntry.FLAG_FORMATTING_INVALIDATED 122 SubPageBakeInfo.FLAG_FORMATTING_INVALIDATED
212 123
213 logger.debug(" p%d -> %s" % (cur_sub, out_path)) 124 logger.debug(" p%d -> %s" % (cur_sub, out_path))
214 qp = QualifiedPage(page, route, route_metadata) 125 qp = QualifiedPage(page, route, route_metadata)
215 ctx, rp = self._bakeSingle(qp, cur_sub, out_path, 126 ctx, rp = self._bakeSingle(qp, cur_sub, out_path, tax_info)
216 bake_taxonomy_info)
217 except Exception as ex: 127 except Exception as ex:
218 if self.app.debug: 128 if self.app.debug:
219 logger.exception(ex) 129 logger.exception(ex)
220 page_rel_path = os.path.relpath(page.path, self.app.root_dir) 130 page_rel_path = os.path.relpath(page.path, self.app.root_dir)
221 raise BakingError("%s: error baking '%s'." % 131 raise BakingError("%s: error baking '%s'." %
222 (page_rel_path, uri)) from ex 132 (page_rel_path, sub_uri)) from ex
223 133
224 # Record what we did. 134 # Record what we did.
225 record_sub_entry.flags |= BakeRecordSubPageEntry.FLAG_BAKED 135 sub_entry.flags |= SubPageBakeInfo.FLAG_BAKED
226 self.record.dirty_source_names.add(record_entry.source_name) 136 # self.record.dirty_source_names.add(record_entry.source_name)
227 for p, pinfo in ctx.render_passes.items(): 137 for p, pinfo in ctx.render_passes.items():
228 brpi = BakeRecordPassInfo() 138 bpi = BakePassInfo()
229 brpi.used_source_names = set(pinfo.used_source_names) 139 bpi.used_source_names = set(pinfo.used_source_names)
230 brpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms) 140 bpi.used_taxonomy_terms = set(pinfo.used_taxonomy_terms)
231 record_sub_entry.render_passes[p] = brpi 141 sub_entry.render_passes[p] = bpi
232 if prev_record_sub_entry: 142 if prev_sub_entry:
233 prev_record_sub_entry.collapseRenderPasses(record_sub_entry) 143 prev_sub_entry.collapseRenderPasses(sub_entry)
144
145 # If this page has had its first sub-page rendered already, we
146 # have that information from the baker. Otherwise (e.g. for
147 # taxonomy pages), we have that information from the result
148 # of the render.
149 info = ctx
150 if cur_sub == 1 and first_render_info is not None:
151 info = first_render_info
234 152
235 # Copy page assets. 153 # Copy page assets.
236 if (cur_sub == 1 and self.copy_assets and 154 if cur_sub == 1 and self.copy_assets and info.used_assets:
237 ctx.used_assets is not None):
238 if self.pretty_urls: 155 if self.pretty_urls:
239 out_assets_dir = os.path.dirname(out_path) 156 out_assets_dir = os.path.dirname(out_path)
240 else: 157 else:
241 out_assets_dir, out_name = os.path.split(out_path) 158 out_assets_dir, out_name = os.path.split(out_path)
242 if sub_uri != self.site_root: 159 if sub_uri != self.site_root:
243 out_name_noext, _ = os.path.splitext(out_name) 160 out_name_noext, _ = os.path.splitext(out_name)
244 out_assets_dir += out_name_noext 161 out_assets_dir += out_name_noext
245 162
246 logger.debug("Copying page assets to: %s" % out_assets_dir) 163 logger.debug("Copying page assets to: %s" % out_assets_dir)
247 if not os.path.isdir(out_assets_dir): 164 _ensure_dir_exists(out_assets_dir)
248 os.makedirs(out_assets_dir, 0o755) 165
249 for ap in ctx.used_assets: 166 used_assets = info.used_assets
167 for ap in used_assets:
250 dest_ap = os.path.join(out_assets_dir, 168 dest_ap = os.path.join(out_assets_dir,
251 os.path.basename(ap)) 169 os.path.basename(ap))
252 logger.debug(" %s -> %s" % (ap, dest_ap)) 170 logger.debug(" %s -> %s" % (ap, dest_ap))
253 shutil.copy(ap, dest_ap) 171 shutil.copy(ap, dest_ap)
254 record_entry.assets.append(ap) 172 report.assets.append(ap)
255 173
256 # Figure out if we have more work. 174 # Figure out if we have more work.
257 has_more_subs = False 175 has_more_subs = False
258 if ctx.used_pagination is not None: 176 if info.pagination_has_more:
259 if ctx.used_pagination.has_more: 177 cur_sub += 1
260 cur_sub += 1 178 has_more_subs = True
261 has_more_subs = True 179
262 180 return report
263 def _bakeSingle(self, qualified_page, num, out_path, taxonomy_info=None): 181
182 def _bakeSingle(self, qualified_page, num, out_path, tax_info=None):
264 ctx = PageRenderingContext(qualified_page, page_num=num) 183 ctx = PageRenderingContext(qualified_page, page_num=num)
265 if taxonomy_info: 184 if tax_info:
266 ctx.setTaxonomyFilter(taxonomy_info[0], taxonomy_info[1]) 185 tax = self.app.getTaxonomy(tax_info.taxonomy_name)
186 ctx.setTaxonomyFilter(tax, tax_info.term)
267 187
268 rp = render_page(ctx) 188 rp = render_page(ctx)
269 189
270 out_dir = os.path.dirname(out_path) 190 out_dir = os.path.dirname(out_path)
271 if not os.path.isdir(out_dir): 191 _ensure_dir_exists(out_dir)
272 os.makedirs(out_dir, 0o755)
273 192
274 with codecs.open(out_path, 'w', 'utf8') as fp: 193 with codecs.open(out_path, 'w', 'utf8') as fp:
275 fp.write(rp.content) 194 fp.write(rp.content)
276 195
277 return ctx, rp 196 return ctx, rp
278 197
279 def _getDirtySourceNamesAndRenderPasses(self, record_sub_entry): 198
280 dirty_src_names = set() 199 def _compute_force_flags(prev_sub_entry, sub_entry, dirty_source_names):
281 invalidated_render_passes = set() 200 # Figure out what to do with this page.
282 for p, pinfo in record_sub_entry.render_passes.items(): 201 force_this_sub = False
283 for src_name in pinfo.used_source_names: 202 invalidate_formatting = False
284 is_dirty = (src_name in self.record.dirty_source_names) 203 sub_uri = sub_entry.out_uri
285 if is_dirty: 204 if (prev_sub_entry and
286 invalidated_render_passes.add(p) 205 (prev_sub_entry.was_baked_successfully or
287 dirty_src_names.add(src_name) 206 prev_sub_entry.was_clean)):
288 break 207 # If the current page is known to use pages from other sources,
289 return dirty_src_names, invalidated_render_passes 208 # see if any of those got baked, or are going to be baked for
290 209 # some reason. If so, we need to bake this one too.
210 # (this happens for instance with the main page of a blog).
211 dirty_for_this, invalidated_render_passes = (
212 _get_dirty_source_names_and_render_passes(
213 prev_sub_entry, dirty_source_names))
214 if len(invalidated_render_passes) > 0:
215 logger.debug(
216 "'%s' is known to use sources %s, which have "
217 "items that got (re)baked. Will force bake this "
218 "page. " % (sub_uri, dirty_for_this))
219 sub_entry.flags |= \
220 SubPageBakeInfo.FLAG_FORCED_BY_SOURCE
221 force_this_sub = True
222
223 if PASS_FORMATTING in invalidated_render_passes:
224 logger.debug(
225 "Will invalidate cached formatting for '%s' "
226 "since sources were using during that pass."
227 % sub_uri)
228 invalidate_formatting = True
229 elif (prev_sub_entry and
230 prev_sub_entry.errors):
231 # Previous bake failed. We'll have to bake it again.
232 logger.debug(
233 "Previous record entry indicates baking failed for "
234 "'%s'. Will bake it again." % sub_uri)
235 sub_entry.flags |= \
236 SubPageBakeInfo.FLAG_FORCED_BY_PREVIOUS_ERRORS
237 force_this_sub = True
238 elif not prev_sub_entry:
239 # No previous record. We'll have to bake it.
240 logger.debug("No previous record entry found for '%s'. Will "
241 "force bake it." % sub_uri)
242 sub_entry.flags |= \
243 SubPageBakeInfo.FLAG_FORCED_BY_NO_PREVIOUS
244 force_this_sub = True
245
246 return force_this_sub, invalidate_formatting
247
248
249 def _get_dirty_source_names_and_render_passes(
250 sub_entry, dirty_source_names):
251 dirty_for_this = set()
252 invalidated_render_passes = set()
253 for p, pinfo in sub_entry.render_passes.items():
254 for src_name in pinfo.used_source_names:
255 is_dirty = (src_name in dirty_source_names)
256 if is_dirty:
257 invalidated_render_passes.add(p)
258 dirty_for_this.add(src_name)
259 break
260 return dirty_for_this, invalidated_render_passes
261
262
263 def _ensure_dir_exists(path):
264 try:
265 os.makedirs(path, mode=0o755, exist_ok=True)
266 except OSError:
267 # In a multiprocess environment, several process may very
268 # occasionally try to create the same directory at the same time.
269 # Let's ignore any error and if something's really wrong (like file
270 # acces permissions or whatever), then it will more legitimately fail
271 # just after this when we try to write files.
272 pass
273