Mercurial > piecrust2
comparison piecrust/baking/baker.py @ 150:91dcbb5fe1e8
Split baking code in smaller files.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 30 Nov 2014 21:46:42 -0800 |
parents | ab6e7e0e9d44 |
children | fd146f54bdaa |
comparison
equal
deleted
inserted
replaced
149:ea4a17831242 | 150:91dcbb5fe1e8 |
---|---|
1 import time | 1 import time |
2 import os.path | 2 import os.path |
3 import codecs | |
4 import shutil | 3 import shutil |
5 import hashlib | 4 import hashlib |
6 import logging | 5 import logging |
7 import threading | 6 import threading |
8 import urllib.request, urllib.error, urllib.parse | |
9 from piecrust.baking.records import (TransitionalBakeRecord, | 7 from piecrust.baking.records import (TransitionalBakeRecord, |
10 BakeRecordPageEntry, | 8 BakeRecordPageEntry) |
11 FLAG_OVERRIDEN, FLAG_SOURCE_MODIFIED) | 9 from piecrust.baking.scheduler import BakeScheduler |
10 from piecrust.baking.single import (BakingError, PageBaker) | |
12 from piecrust.chefutil import format_timed, log_friendly_exception | 11 from piecrust.chefutil import format_timed, log_friendly_exception |
13 from piecrust.data.filters import (PaginationFilter, HasFilterClause, | |
14 IsFilterClause, AndBooleanClause) | |
15 from piecrust.rendering import (PageRenderingContext, render_page, | |
16 PASS_FORMATTING, PASS_RENDERING) | |
17 from piecrust.sources.base import (PageFactory, | 12 from piecrust.sources.base import (PageFactory, |
18 REALM_NAMES, REALM_USER, REALM_THEME) | 13 REALM_NAMES, REALM_USER, REALM_THEME) |
19 | 14 |
20 | 15 |
21 logger = logging.getLogger(__name__) | 16 logger = logging.getLogger(__name__) |
22 | |
23 | |
24 class BakingError(Exception): | |
25 pass | |
26 | |
27 | |
28 class PageBaker(object): | |
29 def __init__(self, app, out_dir, force=False, record=None, | |
30 copy_assets=True): | |
31 self.app = app | |
32 self.out_dir = out_dir | |
33 self.force = force | |
34 self.record = record | |
35 self.copy_assets = copy_assets | |
36 self.site_root = app.config.get('site/root') | |
37 self.pretty_urls = app.config.get('site/pretty_urls') | |
38 self.pagination_suffix = app.config.get('site/pagination_suffix') | |
39 | |
40 def getOutputUri(self, uri, num): | |
41 suffix = self.pagination_suffix.replace('%num%', str(num)) | |
42 if self.pretty_urls: | |
43 # Output will be: | |
44 # - `uri/name` | |
45 # - `uri/name/2` | |
46 # - `uri/name.ext` | |
47 # - `uri/name.ext/2` | |
48 if num <= 1: | |
49 return uri | |
50 return uri + suffix | |
51 else: | |
52 # Output will be: | |
53 # - `uri/name.html` | |
54 # - `uri/name/2.html` | |
55 # - `uri/name.ext` | |
56 # - `uri/name/2.ext` | |
57 if uri == '/': | |
58 if num <= 1: | |
59 return '/' | |
60 return '/' + suffix.lstrip('/') | |
61 else: | |
62 if num <= 1: | |
63 return uri | |
64 #TODO: watch out for tags with dots in them. | |
65 base_uri, ext = os.path.splitext(uri) | |
66 return base_uri + suffix + ext | |
67 | |
68 def getOutputPath(self, uri): | |
69 bake_path = [self.out_dir] | |
70 decoded_uri = urllib.parse.unquote(uri.lstrip('/')) | |
71 if self.pretty_urls: | |
72 bake_path.append(decoded_uri) | |
73 bake_path.append('index.html') | |
74 else: | |
75 name, ext = os.path.splitext(decoded_uri) | |
76 if decoded_uri == '': | |
77 bake_path.append('index.html') | |
78 elif ext: | |
79 bake_path.append(decoded_uri) | |
80 else: | |
81 bake_path.append(decoded_uri + '.html') | |
82 | |
83 return os.path.normpath(os.path.join(*bake_path)) | |
84 | |
85 def bake(self, factory, route, record_entry, | |
86 taxonomy_name=None, taxonomy_term=None): | |
87 custom_data = None | |
88 pagination_filter = None | |
89 route_metadata = dict(factory.metadata) | |
90 if taxonomy_name and taxonomy_term: | |
91 # Must bake a taxonomy listing page... we'll have to add a | |
92 # pagination filter for only get matching posts, and the output | |
93 # URL will be a bit different. | |
94 tax = self.app.getTaxonomy(taxonomy_name) | |
95 pagination_filter = PaginationFilter() | |
96 if tax.is_multiple: | |
97 if isinstance(taxonomy_term, tuple): | |
98 abc = AndBooleanClause() | |
99 for t in taxonomy_term: | |
100 abc.addClause(HasFilterClause(taxonomy_name, t)) | |
101 pagination_filter.addClause(abc) | |
102 slugified_term = '/'.join(taxonomy_term) | |
103 else: | |
104 pagination_filter.addClause(HasFilterClause(taxonomy_name, | |
105 taxonomy_term)) | |
106 slugified_term = taxonomy_term | |
107 else: | |
108 pagination_filter.addClause(IsFilterClause(taxonomy_name, | |
109 taxonomy_term)) | |
110 slugified_term = taxonomy_term | |
111 custom_data = {tax.term_name: taxonomy_term} | |
112 route_metadata.update({tax.term_name: slugified_term}) | |
113 | |
114 # Generate the URL using the route. | |
115 page = factory.buildPage() | |
116 uri = route.getUri(route_metadata, page) | |
117 | |
118 override = self.record.getOverrideEntry(factory, uri) | |
119 if override is not None: | |
120 override_source = self.app.getSource(override.source_name) | |
121 if override_source.realm == factory.source.realm: | |
122 raise BakingError( | |
123 "Page '%s' maps to URL '%s' but is overriden by page" | |
124 "'%s:%s'." % (factory.ref_spec, uri, | |
125 override.source_name, override.rel_path)) | |
126 logger.debug("'%s' [%s] is overriden by '%s:%s'. Skipping" % | |
127 (factory.ref_spec, uri, override.source_name, | |
128 override.rel_path)) | |
129 record_entry.flags |= FLAG_OVERRIDEN | |
130 return | |
131 | |
132 cur_sub = 1 | |
133 has_more_subs = True | |
134 force_this = self.force | |
135 invalidate_formatting = False | |
136 record_entry.config = page.config.get().copy() | |
137 prev_record_entry = self.record.getPreviousEntry( | |
138 factory.source.name, factory.rel_path, | |
139 taxonomy_name, taxonomy_term) | |
140 | |
141 logger.debug("Baking '%s'..." % uri) | |
142 | |
143 # If the current page is known to use pages from other sources, | |
144 # see if any of those got baked, or are going to be baked for some | |
145 # reason. If so, we need to bake this one too. | |
146 # (this happens for instance with the main page of a blog). | |
147 if prev_record_entry and prev_record_entry.was_baked_successfully: | |
148 invalidated_render_passes = set() | |
149 used_src_names = list(prev_record_entry.used_source_names) | |
150 for src_name, rdr_pass in used_src_names: | |
151 entries = self.record.getCurrentEntries(src_name) | |
152 for e in entries: | |
153 if e.was_baked or e.flags & FLAG_SOURCE_MODIFIED: | |
154 invalidated_render_passes.add(rdr_pass) | |
155 break | |
156 if len(invalidated_render_passes) > 0: | |
157 logger.debug("'%s' is known to use sources %s, at least one " | |
158 "of which got baked. Will force bake this page. " | |
159 % (uri, used_src_names)) | |
160 force_this = True | |
161 if PASS_FORMATTING in invalidated_render_passes: | |
162 logger.debug("Will invalidate cached formatting for '%s' " | |
163 "since sources were using during that pass." | |
164 % uri) | |
165 invalidate_formatting = True | |
166 | |
167 while has_more_subs: | |
168 sub_uri = self.getOutputUri(uri, cur_sub) | |
169 out_path = self.getOutputPath(sub_uri) | |
170 | |
171 # Check for up-to-date outputs. | |
172 do_bake = True | |
173 if not force_this: | |
174 try: | |
175 in_path_time = record_entry.path_mtime | |
176 out_path_time = os.path.getmtime(out_path) | |
177 if out_path_time > in_path_time: | |
178 do_bake = False | |
179 except OSError: | |
180 # File doesn't exist, we'll need to bake. | |
181 pass | |
182 | |
183 # If this page didn't bake because it's already up-to-date. | |
184 # Keep trying for as many subs as we know this page has. | |
185 if not do_bake: | |
186 if (prev_record_entry is not None and | |
187 prev_record_entry.num_subs < cur_sub): | |
188 logger.debug("") | |
189 cur_sub += 1 | |
190 has_more_subs = True | |
191 logger.debug(" %s is up to date, skipping to next " | |
192 "sub-page." % out_path) | |
193 continue | |
194 | |
195 # We don't know how many subs to expect... just skip. | |
196 logger.debug(" %s is up to date, skipping bake." % out_path) | |
197 break | |
198 | |
199 # All good, proceed. | |
200 try: | |
201 if invalidate_formatting: | |
202 cache_key = '%s:%s' % (uri, cur_sub) | |
203 self.app.env.rendered_segments_repository.invalidate( | |
204 cache_key) | |
205 | |
206 logger.debug(" p%d -> %s" % (cur_sub, out_path)) | |
207 ctx, rp = self._bakeSingle(page, sub_uri, cur_sub, out_path, | |
208 pagination_filter, custom_data) | |
209 except Exception as ex: | |
210 if self.app.debug: | |
211 logger.exception(ex) | |
212 page_rel_path = os.path.relpath(page.path, self.app.root_dir) | |
213 raise BakingError("%s: error baking '%s'." % | |
214 (page_rel_path, uri)) from ex | |
215 | |
216 # Copy page assets. | |
217 if (cur_sub == 1 and self.copy_assets and | |
218 ctx.used_assets is not None): | |
219 if self.pretty_urls: | |
220 out_assets_dir = os.path.dirname(out_path) | |
221 else: | |
222 out_assets_dir, out_name = os.path.split(out_path) | |
223 if sub_uri != self.site_root: | |
224 out_name_noext, _ = os.path.splitext(out_name) | |
225 out_assets_dir += out_name_noext | |
226 | |
227 logger.debug("Copying page assets to: %s" % out_assets_dir) | |
228 if not os.path.isdir(out_assets_dir): | |
229 os.makedirs(out_assets_dir, 0o755) | |
230 for ap in ctx.used_assets: | |
231 dest_ap = os.path.join(out_assets_dir, os.path.basename(ap)) | |
232 logger.debug(" %s -> %s" % (ap, dest_ap)) | |
233 shutil.copy(ap, dest_ap) | |
234 | |
235 # Record what we did and figure out if we have more work. | |
236 record_entry.out_uris.append(sub_uri) | |
237 record_entry.out_paths.append(out_path) | |
238 record_entry.used_source_names |= ctx.used_source_names | |
239 record_entry.used_taxonomy_terms |= ctx.used_taxonomy_terms | |
240 | |
241 has_more_subs = False | |
242 if (ctx.used_pagination is not None and | |
243 ctx.used_pagination.has_more): | |
244 cur_sub += 1 | |
245 has_more_subs = True | |
246 | |
247 def _bakeSingle(self, page, sub_uri, num, out_path, | |
248 pagination_filter=None, custom_data=None): | |
249 ctx = PageRenderingContext(page, sub_uri) | |
250 ctx.page_num = num | |
251 if pagination_filter: | |
252 ctx.pagination_filter = pagination_filter | |
253 if custom_data: | |
254 ctx.custom_data = custom_data | |
255 | |
256 rp = render_page(ctx) | |
257 | |
258 out_dir = os.path.dirname(out_path) | |
259 if not os.path.isdir(out_dir): | |
260 os.makedirs(out_dir, 0o755) | |
261 | |
262 with codecs.open(out_path, 'w', 'utf8') as fp: | |
263 fp.write(rp.content) | |
264 | |
265 return ctx, rp | |
266 | 17 |
267 | 18 |
268 class Baker(object): | 19 class Baker(object): |
269 def __init__(self, app, out_dir, force=False, portable=False, | 20 def __init__(self, app, out_dir, force=False, portable=False, |
270 no_assets=False, num_workers=4): | 21 no_assets=False, num_workers=4): |
552 for e in excs: | 303 for e in excs: |
553 log_friendly_exception(logger, e) | 304 log_friendly_exception(logger, e) |
554 raise BakingError("Baking was aborted due to errors.") | 305 raise BakingError("Baking was aborted due to errors.") |
555 | 306 |
556 | 307 |
557 class BakeScheduler(object): | |
558 _EMPTY = object() | |
559 _WAIT = object() | |
560 | |
561 def __init__(self, record, jobs=None): | |
562 self.record = record | |
563 self.jobs = list(jobs) if jobs is not None else [] | |
564 self._active_jobs = [] | |
565 self._lock = threading.Lock() | |
566 self._added_event = threading.Event() | |
567 self._done_event = threading.Event() | |
568 | |
569 def addJob(self, job): | |
570 logger.debug("Queuing job '%s:%s'." % ( | |
571 job.factory.source.name, job.factory.rel_path)) | |
572 with self._lock: | |
573 self.jobs.append(job) | |
574 self._added_event.set() | |
575 | |
576 def onJobFinished(self, job): | |
577 logger.debug("Removing job '%s:%s'." % ( | |
578 job.factory.source.name, job.factory.rel_path)) | |
579 with self._lock: | |
580 self._active_jobs.remove(job) | |
581 self._done_event.set() | |
582 | |
583 def getNextJob(self, wait_timeout=None, empty_timeout=None): | |
584 self._added_event.clear() | |
585 self._done_event.clear() | |
586 job = self._doGetNextJob() | |
587 while job in (self._EMPTY, self._WAIT): | |
588 if job == self._EMPTY: | |
589 if empty_timeout is None: | |
590 return None | |
591 logger.debug("Waiting for a new job to be added...") | |
592 res = self._added_event.wait(empty_timeout) | |
593 elif job == self._WAIT: | |
594 if wait_timeout is None: | |
595 return None | |
596 logger.debug("Waiting for a job to be finished...") | |
597 res = self._done_event.wait(wait_timeout) | |
598 if not res: | |
599 logger.debug("Timed-out. No job found.") | |
600 return None | |
601 job = self._doGetNextJob() | |
602 return job | |
603 | |
604 def _doGetNextJob(self): | |
605 with self._lock: | |
606 if len(self.jobs) == 0: | |
607 return self._EMPTY | |
608 | |
609 job = self.jobs.pop(0) | |
610 first_job = job | |
611 while True: | |
612 ready, wait_on_src = self._isJobReady(job) | |
613 if ready: | |
614 break | |
615 | |
616 logger.debug("Job '%s:%s' isn't ready yet: waiting on pages " | |
617 "from source '%s' to finish baking." % | |
618 (job.factory.source.name, | |
619 job.factory.rel_path, wait_on_src)) | |
620 self.jobs.append(job) | |
621 job = self.jobs.pop(0) | |
622 if job == first_job: | |
623 # None of the jobs are ready... we need to wait. | |
624 self.jobs.append(job) | |
625 return self._WAIT | |
626 | |
627 logger.debug("Job '%s:%s' is ready to go, moving to active " | |
628 "queue." % (job.factory.source.name, job.factory.rel_path)) | |
629 self._active_jobs.append(job) | |
630 return job | |
631 | |
632 def _isJobReady(self, job): | |
633 e = self.record.getPreviousEntry(job.factory.source.name, | |
634 job.factory.rel_path) | |
635 if not e: | |
636 return (True, None) | |
637 for sn, rp in e.used_source_names: | |
638 if sn == job.factory.source.name: | |
639 continue | |
640 if any(filter(lambda j: j.factory.source.name == sn, self.jobs)): | |
641 return (False, sn) | |
642 if any(filter(lambda j: j.factory.source.name == sn, | |
643 self._active_jobs)): | |
644 return (False, sn) | |
645 return (True, None) | |
646 | |
647 | |
648 class BakeWorkerContext(object): | 308 class BakeWorkerContext(object): |
649 def __init__(self, app, out_dir, force, record, work_queue, | 309 def __init__(self, app, out_dir, force, record, work_queue, |
650 abort_event): | 310 abort_event): |
651 self.app = app | 311 self.app = app |
652 self.out_dir = out_dir | 312 self.out_dir = out_dir |