comparison piecrust/baking/baker.py @ 150:91dcbb5fe1e8

Split baking code in smaller files.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 30 Nov 2014 21:46:42 -0800
parents ab6e7e0e9d44
children fd146f54bdaa
comparison
equal deleted inserted replaced
149:ea4a17831242 150:91dcbb5fe1e8
1 import time 1 import time
2 import os.path 2 import os.path
3 import codecs
4 import shutil 3 import shutil
5 import hashlib 4 import hashlib
6 import logging 5 import logging
7 import threading 6 import threading
8 import urllib.request, urllib.error, urllib.parse
9 from piecrust.baking.records import (TransitionalBakeRecord, 7 from piecrust.baking.records import (TransitionalBakeRecord,
10 BakeRecordPageEntry, 8 BakeRecordPageEntry)
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) 9 from piecrust.baking.scheduler import BakeScheduler
10 from piecrust.baking.single import (BakingError, PageBaker)
12 from piecrust.chefutil import format_timed, log_friendly_exception 11 from piecrust.chefutil import format_timed, log_friendly_exception
13 from piecrust.data.filters import (PaginationFilter, HasFilterClause,
14 IsFilterClause, AndBooleanClause)
15 from piecrust.rendering import (PageRenderingContext, render_page,
16 PASS_FORMATTING, PASS_RENDERING)
17 from piecrust.sources.base import (PageFactory, 12 from piecrust.sources.base import (PageFactory,
18 REALM_NAMES, REALM_USER, REALM_THEME) 13 REALM_NAMES, REALM_USER, REALM_THEME)
19 14
20 15
21 logger = logging.getLogger(__name__) 16 logger = logging.getLogger(__name__)
22
23
24 class BakingError(Exception):
25 pass
26
27
28 class PageBaker(object):
29 def __init__(self, app, out_dir, force=False, record=None,
30 copy_assets=True):
31 self.app = app
32 self.out_dir = out_dir
33 self.force = force
34 self.record = record
35 self.copy_assets = copy_assets
36 self.site_root = app.config.get('site/root')
37 self.pretty_urls = app.config.get('site/pretty_urls')
38 self.pagination_suffix = app.config.get('site/pagination_suffix')
39
40 def getOutputUri(self, uri, num):
41 suffix = self.pagination_suffix.replace('%num%', str(num))
42 if self.pretty_urls:
43 # Output will be:
44 # - `uri/name`
45 # - `uri/name/2`
46 # - `uri/name.ext`
47 # - `uri/name.ext/2`
48 if num <= 1:
49 return uri
50 return uri + suffix
51 else:
52 # Output will be:
53 # - `uri/name.html`
54 # - `uri/name/2.html`
55 # - `uri/name.ext`
56 # - `uri/name/2.ext`
57 if uri == '/':
58 if num <= 1:
59 return '/'
60 return '/' + suffix.lstrip('/')
61 else:
62 if num <= 1:
63 return uri
64 #TODO: watch out for tags with dots in them.
65 base_uri, ext = os.path.splitext(uri)
66 return base_uri + suffix + ext
67
68 def getOutputPath(self, uri):
69 bake_path = [self.out_dir]
70 decoded_uri = urllib.parse.unquote(uri.lstrip('/'))
71 if self.pretty_urls:
72 bake_path.append(decoded_uri)
73 bake_path.append('index.html')
74 else:
75 name, ext = os.path.splitext(decoded_uri)
76 if decoded_uri == '':
77 bake_path.append('index.html')
78 elif ext:
79 bake_path.append(decoded_uri)
80 else:
81 bake_path.append(decoded_uri + '.html')
82
83 return os.path.normpath(os.path.join(*bake_path))
84
85 def bake(self, factory, route, record_entry,
86 taxonomy_name=None, taxonomy_term=None):
87 custom_data = None
88 pagination_filter = None
89 route_metadata = dict(factory.metadata)
90 if taxonomy_name and taxonomy_term:
91 # Must bake a taxonomy listing page... we'll have to add a
92 # pagination filter for only get matching posts, and the output
93 # URL will be a bit different.
94 tax = self.app.getTaxonomy(taxonomy_name)
95 pagination_filter = PaginationFilter()
96 if tax.is_multiple:
97 if isinstance(taxonomy_term, tuple):
98 abc = AndBooleanClause()
99 for t in taxonomy_term:
100 abc.addClause(HasFilterClause(taxonomy_name, t))
101 pagination_filter.addClause(abc)
102 slugified_term = '/'.join(taxonomy_term)
103 else:
104 pagination_filter.addClause(HasFilterClause(taxonomy_name,
105 taxonomy_term))
106 slugified_term = taxonomy_term
107 else:
108 pagination_filter.addClause(IsFilterClause(taxonomy_name,
109 taxonomy_term))
110 slugified_term = taxonomy_term
111 custom_data = {tax.term_name: taxonomy_term}
112 route_metadata.update({tax.term_name: slugified_term})
113
114 # Generate the URL using the route.
115 page = factory.buildPage()
116 uri = route.getUri(route_metadata, page)
117
118 override = self.record.getOverrideEntry(factory, uri)
119 if override is not None:
120 override_source = self.app.getSource(override.source_name)
121 if override_source.realm == factory.source.realm:
122 raise BakingError(
123 "Page '%s' maps to URL '%s' but is overriden by page"
124 "'%s:%s'." % (factory.ref_spec, uri,
125 override.source_name, override.rel_path))
126 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" %
127 (factory.ref_spec, uri, override.source_name,
128 override.rel_path))
129 record_entry.flags |= FLAG_OVERRIDEN
130 return
131
132 cur_sub = 1
133 has_more_subs = True
134 force_this = self.force
135 invalidate_formatting = False
136 record_entry.config = page.config.get().copy()
137 prev_record_entry = self.record.getPreviousEntry(
138 factory.source.name, factory.rel_path,
139 taxonomy_name, taxonomy_term)
140
141 logger.debug("Baking '%s'..." % uri)
142
143 # If the current page is known to use pages from other sources,
144 # see if any of those got baked, or are going to be baked for some
145 # reason. If so, we need to bake this one too.
146 # (this happens for instance with the main page of a blog).
147 if prev_record_entry and prev_record_entry.was_baked_successfully:
148 invalidated_render_passes = set()
149 used_src_names = list(prev_record_entry.used_source_names)
150 for src_name, rdr_pass in used_src_names:
151 entries = self.record.getCurrentEntries(src_name)
152 for e in entries:
153 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED:
154 invalidated_render_passes.add(rdr_pass)
155 break
156 if len(invalidated_render_passes) > 0:
157 logger.debug("'%s' is known to use sources %s, at least one "
158 "of which got baked. Will force bake this page. "
159 % (uri, used_src_names))
160 force_this = True
161 if PASS_FORMATTING in invalidated_render_passes:
162 logger.debug("Will invalidate cached formatting for '%s' "
163 "since sources were using during that pass."
164 % uri)
165 invalidate_formatting = True
166
167 while has_more_subs:
168 sub_uri = self.getOutputUri(uri, cur_sub)
169 out_path = self.getOutputPath(sub_uri)
170
171 # Check for up-to-date outputs.
172 do_bake = True
173 if not force_this:
174 try:
175 in_path_time = record_entry.path_mtime
176 out_path_time = os.path.getmtime(out_path)
177 if out_path_time > in_path_time:
178 do_bake = False
179 except OSError:
180 # File doesn't exist, we'll need to bake.
181 pass
182
183 # If this page didn't bake because it's already up-to-date.
184 # Keep trying for as many subs as we know this page has.
185 if not do_bake:
186 if (prev_record_entry is not None and
187 prev_record_entry.num_subs < cur_sub):
188 logger.debug("")
189 cur_sub += 1
190 has_more_subs = True
191 logger.debug(" %s is up to date, skipping to next "
192 "sub-page." % out_path)
193 continue
194
195 # We don't know how many subs to expect... just skip.
196 logger.debug(" %s is up to date, skipping bake." % out_path)
197 break
198
199 # All good, proceed.
200 try:
201 if invalidate_formatting:
202 cache_key = '%s:%s' % (uri, cur_sub)
203 self.app.env.rendered_segments_repository.invalidate(
204 cache_key)
205
206 logger.debug(" p%d -> %s" % (cur_sub, out_path))
207 ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path,
208 pagination_filter, custom_data)
209 except Exception as ex:
210 if self.app.debug:
211 logger.exception(ex)
212 page_rel_path = os.path.relpath(page.path, self.app.root_dir)
213 raise BakingError("%s: error baking '%s'." %
214 (page_rel_path, uri)) from ex
215
216 # Copy page assets.
217 if (cur_sub == 1 and self.copy_assets and
218 ctx.used_assets is not None):
219 if self.pretty_urls:
220 out_assets_dir = os.path.dirname(out_path)
221 else:
222 out_assets_dir, out_name = os.path.split(out_path)
223 if sub_uri != self.site_root:
224 out_name_noext, _ = os.path.splitext(out_name)
225 out_assets_dir += out_name_noext
226
227 logger.debug("Copying page assets to: %s" % out_assets_dir)
228 if not os.path.isdir(out_assets_dir):
229 os.makedirs(out_assets_dir, 0o755)
230 for ap in ctx.used_assets:
231 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap))
232 logger.debug(" %s -> %s" % (ap, dest_ap))
233 shutil.copy(ap, dest_ap)
234
235 # Record what we did and figure out if we have more work.
236 record_entry.out_uris.append(sub_uri)
237 record_entry.out_paths.append(out_path)
238 record_entry.used_source_names |= ctx.used_source_names
239 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms
240
241 has_more_subs = False
242 if (ctx.used_pagination is not None and
243 ctx.used_pagination.has_more):
244 cur_sub += 1
245 has_more_subs = True
246
247 def _bakeSingle(self, page, sub_uri, num, out_path,
248 pagination_filter=None, custom_data=None):
249 ctx = PageRenderingContext(page, sub_uri)
250 ctx.page_num = num
251 if pagination_filter:
252 ctx.pagination_filter = pagination_filter
253 if custom_data:
254 ctx.custom_data = custom_data
255
256 rp = render_page(ctx)
257
258 out_dir = os.path.dirname(out_path)
259 if not os.path.isdir(out_dir):
260 os.makedirs(out_dir, 0o755)
261
262 with codecs.open(out_path, 'w', 'utf8') as fp:
263 fp.write(rp.content)
264
265 return ctx, rp
266 17
267 18
268 class Baker(object): 19 class Baker(object):
269 def __init__(self, app, out_dir, force=False, portable=False, 20 def __init__(self, app, out_dir, force=False, portable=False,
270 no_assets=False, num_workers=4): 21 no_assets=False, num_workers=4):
552 for e in excs: 303 for e in excs:
553 log_friendly_exception(logger, e) 304 log_friendly_exception(logger, e)
554 raise BakingError("Baking was aborted due to errors.") 305 raise BakingError("Baking was aborted due to errors.")
555 306
556 307
557 class BakeScheduler(object):
558 _EMPTY = object()
559 _WAIT = object()
560
561 def __init__(self, record, jobs=None):
562 self.record = record
563 self.jobs = list(jobs) if jobs is not None else []
564 self._active_jobs = []
565 self._lock = threading.Lock()
566 self._added_event = threading.Event()
567 self._done_event = threading.Event()
568
569 def addJob(self, job):
570 logger.debug("Queuing job '%s:%s'." % (
571 job.factory.source.name, job.factory.rel_path))
572 with self._lock:
573 self.jobs.append(job)
574 self._added_event.set()
575
576 def onJobFinished(self, job):
577 logger.debug("Removing job '%s:%s'." % (
578 job.factory.source.name, job.factory.rel_path))
579 with self._lock:
580 self._active_jobs.remove(job)
581 self._done_event.set()
582
583 def getNextJob(self, wait_timeout=None, empty_timeout=None):
584 self._added_event.clear()
585 self._done_event.clear()
586 job = self._doGetNextJob()
587 while job in (self._EMPTY, self._WAIT):
588 if job == self._EMPTY:
589 if empty_timeout is None:
590 return None
591 logger.debug("Waiting for a new job to be added...")
592 res = self._added_event.wait(empty_timeout)
593 elif job == self._WAIT:
594 if wait_timeout is None:
595 return None
596 logger.debug("Waiting for a job to be finished...")
597 res = self._done_event.wait(wait_timeout)
598 if not res:
599 logger.debug("Timed-out. No job found.")
600 return None
601 job = self._doGetNextJob()
602 return job
603
604 def _doGetNextJob(self):
605 with self._lock:
606 if len(self.jobs) == 0:
607 return self._EMPTY
608
609 job = self.jobs.pop(0)
610 first_job = job
611 while True:
612 ready, wait_on_src = self._isJobReady(job)
613 if ready:
614 break
615
616 logger.debug("Job '%s:%s' isn't ready yet: waiting on pages "
617 "from source '%s' to finish baking." %
618 (job.factory.source.name,
619 job.factory.rel_path, wait_on_src))
620 self.jobs.append(job)
621 job = self.jobs.pop(0)
622 if job == first_job:
623 # None of the jobs are ready... we need to wait.
624 self.jobs.append(job)
625 return self._WAIT
626
627 logger.debug("Job '%s:%s' is ready to go, moving to active "
628 "queue." % (job.factory.source.name, job.factory.rel_path))
629 self._active_jobs.append(job)
630 return job
631
632 def _isJobReady(self, job):
633 e = self.record.getPreviousEntry(job.factory.source.name,
634 job.factory.rel_path)
635 if not e:
636 return (True, None)
637 for sn, rp in e.used_source_names:
638 if sn == job.factory.source.name:
639 continue
640 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)):
641 return (False, sn)
642 if any(filter(lambda j: j.factory.source.name == sn,
643 self._active_jobs)):
644 return (False, sn)
645 return (True, None)
646
647
648 class BakeWorkerContext(object): 308 class BakeWorkerContext(object):
649 def __init__(self, app, out_dir, force, record, work_queue, 309 def __init__(self, app, out_dir, force, record, work_queue,
650 abort_event): 310 abort_event):
651 self.app = app 311 self.app = app
652 self.out_dir = out_dir 312 self.out_dir = out_dir