Mercurial > piecrust2
comparison piecrust/sources/autoconfig.py @ 852:4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
* Everything is a `ContentSource`, including assets directories.
* Most content sources are subclasses of the base file-system source.
* A source is processed by a "pipeline", and there are 2 built-in pipelines,
one for assets and one for pages. The asset pipeline is vaguely functional,
but the page pipeline is completely broken right now.
* Rewrite the baking process as just running appropriate pipelines on each
content item. This should allow for better parallelization.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Wed, 17 May 2017 00:11:48 -0700 |
parents | 58ebf50235a5 |
children | f070a4fc033c |
comparison
equal
deleted
inserted
replaced
851:2c7e57d80bba | 852:4850f8c21b6e |
---|---|
1 import re | 1 import re |
2 import os | 2 import os |
3 import os.path | 3 import os.path |
4 import logging | 4 import logging |
5 from piecrust.configuration import ConfigurationError | 5 from piecrust.configuration import ConfigurationError |
6 from piecrust.routing import RouteParameter | 6 from piecrust.sources.base import ContentItem |
7 from piecrust.sources.base import ( | 7 from piecrust.sources.default import DefaultContentSource |
8 PageSource, PageFactory, InvalidFileSystemEndpointError) | |
9 from piecrust.sources.default import ( | |
10 filter_page_dirname, filter_page_filename) | |
11 from piecrust.sources.interfaces import IListableSource | |
12 from piecrust.sources.mixins import SimplePaginationSourceMixin | |
13 | 8 |
14 | 9 |
15 logger = logging.getLogger(__name__) | 10 logger = logging.getLogger(__name__) |
16 | 11 |
17 | 12 |
18 class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin, | 13 class AutoConfigContentSourceBase(DefaultContentSource): |
19 IListableSource): | 14 """ Base class for content sources that automatically apply configuration |
20 """ Base class for page sources that automatically apply configuration | |
21 settings to their generated pages based on those pages' paths. | 15 settings to their generated pages based on those pages' paths. |
22 """ | 16 """ |
23 def __init__(self, app, name, config): | 17 def __init__(self, app, name, config): |
24 super(AutoConfigSourceBase, self).__init__(app, name, config) | 18 DefaultContentSource.__init__(app, name, config) |
25 self.fs_endpoint = config.get('fs_endpoint', name) | |
26 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) | |
27 self.supported_extensions = list( | |
28 app.config.get('site/auto_formats').keys()) | |
29 self.default_auto_format = app.config.get('site/default_auto_format') | |
30 | 19 |
31 self.capture_mode = config.get('capture_mode', 'path') | 20 self.capture_mode = config.get('capture_mode', 'path') |
32 if self.capture_mode not in ['path', 'dirname', 'filename']: | 21 if self.capture_mode not in ['path', 'dirname', 'filename']: |
33 raise ConfigurationError("Capture mode in source '%s' must be " | 22 raise ConfigurationError("Capture mode in source '%s' must be " |
34 "one of: path, dirname, filename" % | 23 "one of: path, dirname, filename" % |
35 name) | 24 name) |
36 | 25 |
37 def getSupportedRouteParameters(self): | 26 def _finalizeContent(self, parent_group, items, groups): |
38 return [ | 27 DefaultContentSource._finalizeContent(parent_group, items, groups) |
39 RouteParameter('slug', RouteParameter.TYPE_PATH)] | 28 |
40 | 29 # If `capture_mode` is `dirname`, we don't need to recompute it |
41 def buildPageFactories(self): | 30 # for each filename, so we do it here. |
42 logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path) | |
43 if not os.path.isdir(self.fs_endpoint_path): | |
44 raise InvalidFileSystemEndpointError(self.name, | |
45 self.fs_endpoint_path) | |
46 | |
47 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): | |
48 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) | |
49 dirnames[:] = list(filter(filter_page_dirname, dirnames)) | |
50 | |
51 # If `capture_mode` is `dirname`, we don't need to recompute it | |
52 # for each filename, so we do it here. | |
53 if self.capture_mode == 'dirname': | |
54 config = self._extractConfigFragment(rel_dirpath) | |
55 | |
56 for f in filter(filter_page_filename, filenames): | |
57 if self.capture_mode == 'path': | |
58 path = os.path.join(rel_dirpath, f) | |
59 config = self._extractConfigFragment(path) | |
60 elif self.capture_mode == 'filename': | |
61 config = self._extractConfigFragment(f) | |
62 | |
63 fac_path = f | |
64 if rel_dirpath != '.': | |
65 fac_path = os.path.join(rel_dirpath, f) | |
66 | |
67 slug = self._makeSlug(fac_path) | |
68 | |
69 metadata = { | |
70 'slug': slug, | |
71 'config': config} | |
72 yield PageFactory(self, fac_path, metadata) | |
73 | |
74 def resolveRef(self, ref_path): | |
75 path = os.path.normpath( | |
76 os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/"))) | |
77 | |
78 config = None | |
79 if self.capture_mode == 'dirname': | 31 if self.capture_mode == 'dirname': |
80 config = self._extractConfigFragment(os.path.dirname(ref_path)) | 32 rel_dirpath = os.path.relpath(parent_group.spec, |
81 elif self.capture_mode == 'path': | 33 self.fs_endpoint_path) |
82 config = self._extractConfigFragment(ref_path) | 34 config = self._extractConfigFragment(rel_dirpath) |
83 elif self.capture_mode == 'filename': | 35 |
84 config = self._extractConfigFragment(os.path.basename(ref_path)) | 36 for i in items: |
85 | 37 # Compute the confif for the other capture modes. |
86 slug = self._makeSlug(ref_path) | 38 if self.capture_mode == 'path': |
87 metadata = {'slug': slug, 'config': config} | 39 rel_path = os.path.relpath(i.spec, self.fs_endpoint_path) |
88 return path, metadata | 40 config = self._extractConfigFragment(rel_path) |
89 | 41 elif self.capture_mode == 'filename': |
90 def listPath(self, rel_path): | 42 fname = os.path.basename(i.spec) |
91 raise NotImplementedError() | 43 config = self._extractConfigFragment(fname) |
92 | 44 |
93 def getDirpath(self, rel_path): | 45 # Set the config on the content item's metadata. |
94 return os.path.dirname(rel_path) | 46 i.metadata.setdefault('config', {}).update(config) |
95 | |
96 def getBasename(self, rel_path): | |
97 filename = os.path.basename(rel_path) | |
98 name, _ = os.path.splitext(filename) | |
99 return name | |
100 | |
101 def _makeSlug(self, rel_path): | |
102 slug = rel_path.replace('\\', '/') | |
103 slug = self._cleanSlug(slug) | |
104 slug, ext = os.path.splitext(slug) | |
105 if ext.lstrip('.') not in self.supported_extensions: | |
106 slug += ext | |
107 if slug.startswith('./'): | |
108 slug = slug[2:] | |
109 if slug == '_index': | |
110 slug = '' | |
111 return slug | |
112 | |
113 def _cleanSlug(self, slug): | |
114 return slug | |
115 | 47 |
116 def _extractConfigFragment(self, rel_path): | 48 def _extractConfigFragment(self, rel_path): |
117 raise NotImplementedError() | 49 raise NotImplementedError() |
118 | 50 |
119 | 51 |
120 class AutoConfigSource(AutoConfigSourceBase): | 52 class AutoConfigContentSource(AutoConfigContentSourceBase): |
121 """ Page source that extracts configuration settings from the sub-folders | 53 """ Content source that extracts configuration settings from the sub-folders |
122 each page resides in. This is ideal for setting tags or categories | 54 each page resides in. This is ideal for setting tags or categories |
123 on pages based on the folders they're in. | 55 on pages based on the folders they're in. |
124 """ | 56 """ |
125 SOURCE_NAME = 'autoconfig' | 57 SOURCE_NAME = 'autoconfig' |
126 | 58 |
127 def __init__(self, app, name, config): | 59 def __init__(self, app, name, config): |
128 config['capture_mode'] = 'dirname' | 60 config['capture_mode'] = 'dirname' |
129 super(AutoConfigSource, self).__init__(app, name, config) | 61 AutoConfigContentSourceBase.__init__(app, name, config) |
62 | |
130 self.setting_name = config.get('setting_name', name) | 63 self.setting_name = config.get('setting_name', name) |
131 self.only_single_values = config.get('only_single_values', False) | 64 self.only_single_values = config.get('only_single_values', False) |
132 self.collapse_single_values = config.get('collapse_single_values', | 65 self.collapse_single_values = config.get('collapse_single_values', |
133 False) | 66 False) |
134 self.supported_extensions = list( | |
135 app.config.get('site/auto_formats').keys()) | |
136 | 67 |
137 def _extractConfigFragment(self, rel_path): | 68 def _extractConfigFragment(self, rel_path): |
138 if rel_path == '.': | 69 if rel_path == '.': |
139 values = [] | 70 values = [] |
140 else: | 71 else: |
155 elif len(values) == 0: | 86 elif len(values) == 0: |
156 values = None | 87 values = None |
157 | 88 |
158 return {self.setting_name: values} | 89 return {self.setting_name: values} |
159 | 90 |
160 def findPageFactory(self, metadata, mode): | 91 def findContent(self, route_params): |
161 # Pages from this source are effectively flattened, so we need to | 92 # Pages from this source are effectively flattened, so we need to |
162 # find pages using a brute-force kinda way. | 93 # find pages using a brute-force kinda way. |
94 route_slug = route_params.get('slug', '') | |
95 if not route_slug: | |
96 route_slug = '_index' | |
97 | |
163 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): | 98 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): |
164 for f in filenames: | 99 for f in filenames: |
165 slug, _ = os.path.splitext(f) | 100 slug, _ = os.path.splitext(f) |
166 if slug == metadata['slug']: | 101 if slug == route_slug: |
167 path = os.path.join(dirpath, f) | 102 path = os.path.join(dirpath, f) |
168 rel_path = os.path.relpath(path, self.fs_endpoint_path) | 103 rel_path = os.path.relpath(path, self.fs_endpoint_path) |
169 config = self._extractConfigFragment(rel_path) | 104 config = self._extractConfigFragment(rel_path) |
170 metadata = {'slug': slug, 'config': config} | 105 metadata = {'slug': slug, 'config': config} |
171 return PageFactory(self, rel_path, metadata) | 106 return ContentItem(path, metadata) |
172 return None | 107 return None |
173 | 108 |
174 def listPath(self, rel_path): | 109 |
175 rel_path = rel_path.lstrip('\\/') | 110 class OrderedContentSource(AutoConfigContentSourceBase): |
176 path = os.path.join(self.fs_endpoint_path, rel_path) | 111 """ A content source that assigns an "order" to its pages based on a |
177 names = sorted(os.listdir(path)) | |
178 items = [] | |
179 for name in names: | |
180 if os.path.isdir(os.path.join(path, name)): | |
181 if filter_page_dirname(name): | |
182 rel_subdir = os.path.join(rel_path, name) | |
183 items.append((True, name, rel_subdir)) | |
184 else: | |
185 if filter_page_filename(name): | |
186 cur_rel_path = os.path.join(rel_path, name) | |
187 slug = self._makeSlug(cur_rel_path) | |
188 config = self._extractConfigFragment(cur_rel_path) | |
189 metadata = {'slug': slug, 'config': config} | |
190 fac = PageFactory(self, cur_rel_path, metadata) | |
191 | |
192 name, _ = os.path.splitext(name) | |
193 items.append((False, name, fac)) | |
194 return items | |
195 | |
196 def _cleanSlug(self, slug): | |
197 return os.path.basename(slug) | |
198 | |
199 | |
200 class OrderedPageSource(AutoConfigSourceBase): | |
201 """ A page source that assigns an "order" to its pages based on a | |
202 numerical prefix in their filename. Page iterators will automatically | 112 numerical prefix in their filename. Page iterators will automatically |
203 sort pages using that order. | 113 sort pages using that order. |
204 """ | 114 """ |
205 SOURCE_NAME = 'ordered' | 115 SOURCE_NAME = 'ordered' |
206 | 116 |
207 re_pattern = re.compile(r'(^|[/\\])(?P<num>\d+)_') | 117 re_pattern = re.compile(r'(^|[/\\])(?P<num>\d+)_') |
208 | 118 |
209 def __init__(self, app, name, config): | 119 def __init__(self, app, name, config): |
210 config['capture_mode'] = 'path' | 120 config['capture_mode'] = 'path' |
211 super(OrderedPageSource, self).__init__(app, name, config) | 121 AutoConfigContentSourceBase.__init__(app, name, config) |
122 | |
212 self.setting_name = config.get('setting_name', 'order') | 123 self.setting_name = config.get('setting_name', 'order') |
213 self.default_value = config.get('default_value', 0) | 124 self.default_value = config.get('default_value', 0) |
214 self.supported_extensions = list( | 125 |
215 app.config.get('site/auto_formats').keys()) | 126 def findContent(self, route_params): |
216 | 127 uri_path = route_params.get('slug', '') |
217 def findPageFactory(self, metadata, mode): | |
218 uri_path = metadata.get('slug', '') | |
219 if uri_path == '': | 128 if uri_path == '': |
220 uri_path = '_index' | 129 uri_path = '_index' |
221 | 130 |
222 path = self.fs_endpoint_path | 131 path = self.fs_endpoint_path |
223 uri_parts = uri_path.split('/') | 132 uri_parts = uri_path.split('/') |
251 found = True | 160 found = True |
252 break | 161 break |
253 if not found: | 162 if not found: |
254 return None | 163 return None |
255 | 164 |
256 fac_path = os.path.relpath(path, self.fs_endpoint_path) | 165 rel_path = os.path.relpath(path, self.fs_endpoint_path) |
257 config = self._extractConfigFragment(fac_path) | 166 config = self._extractConfigFragment(rel_path) |
258 metadata = {'slug': uri_path, 'config': config} | 167 metadata = {'slug': uri_path, 'config': config} |
259 | 168 return ContentItem(path, metadata) |
260 return PageFactory(self, fac_path, metadata) | |
261 | 169 |
262 def getSorterIterator(self, it): | 170 def getSorterIterator(self, it): |
263 accessor = self.getSettingAccessor() | 171 accessor = self.getSettingAccessor() |
264 return OrderTrailSortIterator(it, self.setting_name + '_trail', | 172 return OrderTrailSortIterator(it, self.setting_name + '_trail', |
265 value_accessor=accessor) | 173 value_accessor=accessor) |
266 | |
267 def listPath(self, rel_path): | |
268 rel_path = rel_path.lstrip('/') | |
269 path = self.fs_endpoint_path | |
270 if rel_path != '': | |
271 parts = rel_path.split('/') | |
272 for p in parts: | |
273 p_pat = r'(\d+_)?' + re.escape(p) + '$' | |
274 for name in os.listdir(path): | |
275 if re.match(p_pat, name): | |
276 path = os.path.join(path, name) | |
277 break | |
278 else: | |
279 raise Exception("No such path: %s" % rel_path) | |
280 | |
281 items = [] | |
282 names = sorted(os.listdir(path)) | |
283 for name in names: | |
284 clean_name = self.re_pattern.sub('', name) | |
285 clean_name, _ = os.path.splitext(clean_name) | |
286 if os.path.isdir(os.path.join(path, name)): | |
287 if filter_page_dirname(name): | |
288 rel_subdir = os.path.join(rel_path, name) | |
289 items.append((True, clean_name, rel_subdir)) | |
290 else: | |
291 if filter_page_filename(name): | |
292 slug = self._makeSlug(os.path.join(rel_path, name)) | |
293 | |
294 fac_path = name | |
295 if rel_path != '.': | |
296 fac_path = os.path.join(rel_path, name) | |
297 fac_path = fac_path.replace('\\', '/') | |
298 | |
299 config = self._extractConfigFragment(fac_path) | |
300 metadata = {'slug': slug, 'config': config} | |
301 fac = PageFactory(self, fac_path, metadata) | |
302 | |
303 name, _ = os.path.splitext(name) | |
304 items.append((False, clean_name, fac)) | |
305 return items | |
306 | |
307 def _cleanSlug(self, slug): | |
308 return self.re_pattern.sub(r'\1', slug) | |
309 | 174 |
310 def _extractConfigFragment(self, rel_path): | 175 def _extractConfigFragment(self, rel_path): |
311 values = [] | 176 values = [] |
312 for m in self.re_pattern.finditer(rel_path): | 177 for m in self.re_pattern.finditer(rel_path): |
313 val = int(m.group('num')) | 178 val = int(m.group('num')) |
315 | 180 |
316 if len(values) == 0: | 181 if len(values) == 0: |
317 values.append(self.default_value) | 182 values.append(self.default_value) |
318 | 183 |
319 return { | 184 return { |
320 self.setting_name: values[-1], | 185 self.setting_name: values[-1], |
321 self.setting_name + '_trail': values} | 186 self.setting_name + '_trail': values} |
322 | 187 |
323 def _populateMetadata(self, rel_path, metadata, mode=None): | 188 def _makeSlug(self, path): |
324 _, filename = os.path.split(rel_path) | 189 slug = super()._makeSlug(path) |
325 config = self._extractConfigFragment(filename) | 190 return self.re_pattern.sub(r'\1', slug) |
326 metadata['config'] = config | |
327 slug = metadata['slug'] | |
328 metadata['slug'] = self.re_pattern.sub(r'\1', slug) | |
329 | 191 |
330 | 192 |
331 class OrderTrailSortIterator(object): | 193 class OrderTrailSortIterator(object): |
332 def __init__(self, it, trail_name, value_accessor): | 194 def __init__(self, it, trail_name, value_accessor): |
333 self.it = it | 195 self.it = it |