comparison piecrust/sources/autoconfig.py @ 852:4850f8c21b6e

core: Start of the big refactor for PieCrust 3.0. * Everything is a `ContentSource`, including assets directories. * Most content sources are subclasses of the base file-system source. * A source is processed by a "pipeline", and there are 2 built-in pipelines, one for assets and one for pages. The asset pipeline is vaguely functional, but the page pipeline is completely broken right now. * Rewrite the baking process as just running appropriate pipelines on each content item. This should allow for better parallelization.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 17 May 2017 00:11:48 -0700
parents 58ebf50235a5
children f070a4fc033c
comparison
equal deleted inserted replaced
851:2c7e57d80bba 852:4850f8c21b6e
1 import re 1 import re
2 import os 2 import os
3 import os.path 3 import os.path
4 import logging 4 import logging
5 from piecrust.configuration import ConfigurationError 5 from piecrust.configuration import ConfigurationError
6 from piecrust.routing import RouteParameter 6 from piecrust.sources.base import ContentItem
7 from piecrust.sources.base import ( 7 from piecrust.sources.default import DefaultContentSource
8 PageSource, PageFactory, InvalidFileSystemEndpointError)
9 from piecrust.sources.default import (
10 filter_page_dirname, filter_page_filename)
11 from piecrust.sources.interfaces import IListableSource
12 from piecrust.sources.mixins import SimplePaginationSourceMixin
13 8
14 9
15 logger = logging.getLogger(__name__) 10 logger = logging.getLogger(__name__)
16 11
17 12
18 class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin, 13 class AutoConfigContentSourceBase(DefaultContentSource):
19 IListableSource): 14 """ Base class for content sources that automatically apply configuration
20 """ Base class for page sources that automatically apply configuration
21 settings to their generated pages based on those pages' paths. 15 settings to their generated pages based on those pages' paths.
22 """ 16 """
23 def __init__(self, app, name, config): 17 def __init__(self, app, name, config):
24 super(AutoConfigSourceBase, self).__init__(app, name, config) 18 DefaultContentSource.__init__(app, name, config)
25 self.fs_endpoint = config.get('fs_endpoint', name)
26 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
27 self.supported_extensions = list(
28 app.config.get('site/auto_formats').keys())
29 self.default_auto_format = app.config.get('site/default_auto_format')
30 19
31 self.capture_mode = config.get('capture_mode', 'path') 20 self.capture_mode = config.get('capture_mode', 'path')
32 if self.capture_mode not in ['path', 'dirname', 'filename']: 21 if self.capture_mode not in ['path', 'dirname', 'filename']:
33 raise ConfigurationError("Capture mode in source '%s' must be " 22 raise ConfigurationError("Capture mode in source '%s' must be "
34 "one of: path, dirname, filename" % 23 "one of: path, dirname, filename" %
35 name) 24 name)
36 25
37 def getSupportedRouteParameters(self): 26 def _finalizeContent(self, parent_group, items, groups):
38 return [ 27 DefaultContentSource._finalizeContent(parent_group, items, groups)
39 RouteParameter('slug', RouteParameter.TYPE_PATH)] 28
40 29 # If `capture_mode` is `dirname`, we don't need to recompute it
41 def buildPageFactories(self): 30 # for each filename, so we do it here.
42 logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path)
43 if not os.path.isdir(self.fs_endpoint_path):
44 raise InvalidFileSystemEndpointError(self.name,
45 self.fs_endpoint_path)
46
47 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
48 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
49 dirnames[:] = list(filter(filter_page_dirname, dirnames))
50
51 # If `capture_mode` is `dirname`, we don't need to recompute it
52 # for each filename, so we do it here.
53 if self.capture_mode == 'dirname':
54 config = self._extractConfigFragment(rel_dirpath)
55
56 for f in filter(filter_page_filename, filenames):
57 if self.capture_mode == 'path':
58 path = os.path.join(rel_dirpath, f)
59 config = self._extractConfigFragment(path)
60 elif self.capture_mode == 'filename':
61 config = self._extractConfigFragment(f)
62
63 fac_path = f
64 if rel_dirpath != '.':
65 fac_path = os.path.join(rel_dirpath, f)
66
67 slug = self._makeSlug(fac_path)
68
69 metadata = {
70 'slug': slug,
71 'config': config}
72 yield PageFactory(self, fac_path, metadata)
73
74 def resolveRef(self, ref_path):
75 path = os.path.normpath(
76 os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/")))
77
78 config = None
79 if self.capture_mode == 'dirname': 31 if self.capture_mode == 'dirname':
80 config = self._extractConfigFragment(os.path.dirname(ref_path)) 32 rel_dirpath = os.path.relpath(parent_group.spec,
81 elif self.capture_mode == 'path': 33 self.fs_endpoint_path)
82 config = self._extractConfigFragment(ref_path) 34 config = self._extractConfigFragment(rel_dirpath)
83 elif self.capture_mode == 'filename': 35
84 config = self._extractConfigFragment(os.path.basename(ref_path)) 36 for i in items:
85 37 # Compute the confif for the other capture modes.
86 slug = self._makeSlug(ref_path) 38 if self.capture_mode == 'path':
87 metadata = {'slug': slug, 'config': config} 39 rel_path = os.path.relpath(i.spec, self.fs_endpoint_path)
88 return path, metadata 40 config = self._extractConfigFragment(rel_path)
89 41 elif self.capture_mode == 'filename':
90 def listPath(self, rel_path): 42 fname = os.path.basename(i.spec)
91 raise NotImplementedError() 43 config = self._extractConfigFragment(fname)
92 44
93 def getDirpath(self, rel_path): 45 # Set the config on the content item's metadata.
94 return os.path.dirname(rel_path) 46 i.metadata.setdefault('config', {}).update(config)
95
96 def getBasename(self, rel_path):
97 filename = os.path.basename(rel_path)
98 name, _ = os.path.splitext(filename)
99 return name
100
101 def _makeSlug(self, rel_path):
102 slug = rel_path.replace('\\', '/')
103 slug = self._cleanSlug(slug)
104 slug, ext = os.path.splitext(slug)
105 if ext.lstrip('.') not in self.supported_extensions:
106 slug += ext
107 if slug.startswith('./'):
108 slug = slug[2:]
109 if slug == '_index':
110 slug = ''
111 return slug
112
113 def _cleanSlug(self, slug):
114 return slug
115 47
116 def _extractConfigFragment(self, rel_path): 48 def _extractConfigFragment(self, rel_path):
117 raise NotImplementedError() 49 raise NotImplementedError()
118 50
119 51
120 class AutoConfigSource(AutoConfigSourceBase): 52 class AutoConfigContentSource(AutoConfigContentSourceBase):
121 """ Page source that extracts configuration settings from the sub-folders 53 """ Content source that extracts configuration settings from the sub-folders
122 each page resides in. This is ideal for setting tags or categories 54 each page resides in. This is ideal for setting tags or categories
123 on pages based on the folders they're in. 55 on pages based on the folders they're in.
124 """ 56 """
125 SOURCE_NAME = 'autoconfig' 57 SOURCE_NAME = 'autoconfig'
126 58
127 def __init__(self, app, name, config): 59 def __init__(self, app, name, config):
128 config['capture_mode'] = 'dirname' 60 config['capture_mode'] = 'dirname'
129 super(AutoConfigSource, self).__init__(app, name, config) 61 AutoConfigContentSourceBase.__init__(app, name, config)
62
130 self.setting_name = config.get('setting_name', name) 63 self.setting_name = config.get('setting_name', name)
131 self.only_single_values = config.get('only_single_values', False) 64 self.only_single_values = config.get('only_single_values', False)
132 self.collapse_single_values = config.get('collapse_single_values', 65 self.collapse_single_values = config.get('collapse_single_values',
133 False) 66 False)
134 self.supported_extensions = list(
135 app.config.get('site/auto_formats').keys())
136 67
137 def _extractConfigFragment(self, rel_path): 68 def _extractConfigFragment(self, rel_path):
138 if rel_path == '.': 69 if rel_path == '.':
139 values = [] 70 values = []
140 else: 71 else:
155 elif len(values) == 0: 86 elif len(values) == 0:
156 values = None 87 values = None
157 88
158 return {self.setting_name: values} 89 return {self.setting_name: values}
159 90
160 def findPageFactory(self, metadata, mode): 91 def findContent(self, route_params):
161 # Pages from this source are effectively flattened, so we need to 92 # Pages from this source are effectively flattened, so we need to
162 # find pages using a brute-force kinda way. 93 # find pages using a brute-force kinda way.
94 route_slug = route_params.get('slug', '')
95 if not route_slug:
96 route_slug = '_index'
97
163 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): 98 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
164 for f in filenames: 99 for f in filenames:
165 slug, _ = os.path.splitext(f) 100 slug, _ = os.path.splitext(f)
166 if slug == metadata['slug']: 101 if slug == route_slug:
167 path = os.path.join(dirpath, f) 102 path = os.path.join(dirpath, f)
168 rel_path = os.path.relpath(path, self.fs_endpoint_path) 103 rel_path = os.path.relpath(path, self.fs_endpoint_path)
169 config = self._extractConfigFragment(rel_path) 104 config = self._extractConfigFragment(rel_path)
170 metadata = {'slug': slug, 'config': config} 105 metadata = {'slug': slug, 'config': config}
171 return PageFactory(self, rel_path, metadata) 106 return ContentItem(path, metadata)
172 return None 107 return None
173 108
174 def listPath(self, rel_path): 109
175 rel_path = rel_path.lstrip('\\/') 110 class OrderedContentSource(AutoConfigContentSourceBase):
176 path = os.path.join(self.fs_endpoint_path, rel_path) 111 """ A content source that assigns an "order" to its pages based on a
177 names = sorted(os.listdir(path))
178 items = []
179 for name in names:
180 if os.path.isdir(os.path.join(path, name)):
181 if filter_page_dirname(name):
182 rel_subdir = os.path.join(rel_path, name)
183 items.append((True, name, rel_subdir))
184 else:
185 if filter_page_filename(name):
186 cur_rel_path = os.path.join(rel_path, name)
187 slug = self._makeSlug(cur_rel_path)
188 config = self._extractConfigFragment(cur_rel_path)
189 metadata = {'slug': slug, 'config': config}
190 fac = PageFactory(self, cur_rel_path, metadata)
191
192 name, _ = os.path.splitext(name)
193 items.append((False, name, fac))
194 return items
195
196 def _cleanSlug(self, slug):
197 return os.path.basename(slug)
198
199
200 class OrderedPageSource(AutoConfigSourceBase):
201 """ A page source that assigns an "order" to its pages based on a
202 numerical prefix in their filename. Page iterators will automatically 112 numerical prefix in their filename. Page iterators will automatically
203 sort pages using that order. 113 sort pages using that order.
204 """ 114 """
205 SOURCE_NAME = 'ordered' 115 SOURCE_NAME = 'ordered'
206 116
207 re_pattern = re.compile(r'(^|[/\\])(?P<num>\d+)_') 117 re_pattern = re.compile(r'(^|[/\\])(?P<num>\d+)_')
208 118
209 def __init__(self, app, name, config): 119 def __init__(self, app, name, config):
210 config['capture_mode'] = 'path' 120 config['capture_mode'] = 'path'
211 super(OrderedPageSource, self).__init__(app, name, config) 121 AutoConfigContentSourceBase.__init__(app, name, config)
122
212 self.setting_name = config.get('setting_name', 'order') 123 self.setting_name = config.get('setting_name', 'order')
213 self.default_value = config.get('default_value', 0) 124 self.default_value = config.get('default_value', 0)
214 self.supported_extensions = list( 125
215 app.config.get('site/auto_formats').keys()) 126 def findContent(self, route_params):
216 127 uri_path = route_params.get('slug', '')
217 def findPageFactory(self, metadata, mode):
218 uri_path = metadata.get('slug', '')
219 if uri_path == '': 128 if uri_path == '':
220 uri_path = '_index' 129 uri_path = '_index'
221 130
222 path = self.fs_endpoint_path 131 path = self.fs_endpoint_path
223 uri_parts = uri_path.split('/') 132 uri_parts = uri_path.split('/')
251 found = True 160 found = True
252 break 161 break
253 if not found: 162 if not found:
254 return None 163 return None
255 164
256 fac_path = os.path.relpath(path, self.fs_endpoint_path) 165 rel_path = os.path.relpath(path, self.fs_endpoint_path)
257 config = self._extractConfigFragment(fac_path) 166 config = self._extractConfigFragment(rel_path)
258 metadata = {'slug': uri_path, 'config': config} 167 metadata = {'slug': uri_path, 'config': config}
259 168 return ContentItem(path, metadata)
260 return PageFactory(self, fac_path, metadata)
261 169
262 def getSorterIterator(self, it): 170 def getSorterIterator(self, it):
263 accessor = self.getSettingAccessor() 171 accessor = self.getSettingAccessor()
264 return OrderTrailSortIterator(it, self.setting_name + '_trail', 172 return OrderTrailSortIterator(it, self.setting_name + '_trail',
265 value_accessor=accessor) 173 value_accessor=accessor)
266
267 def listPath(self, rel_path):
268 rel_path = rel_path.lstrip('/')
269 path = self.fs_endpoint_path
270 if rel_path != '':
271 parts = rel_path.split('/')
272 for p in parts:
273 p_pat = r'(\d+_)?' + re.escape(p) + '$'
274 for name in os.listdir(path):
275 if re.match(p_pat, name):
276 path = os.path.join(path, name)
277 break
278 else:
279 raise Exception("No such path: %s" % rel_path)
280
281 items = []
282 names = sorted(os.listdir(path))
283 for name in names:
284 clean_name = self.re_pattern.sub('', name)
285 clean_name, _ = os.path.splitext(clean_name)
286 if os.path.isdir(os.path.join(path, name)):
287 if filter_page_dirname(name):
288 rel_subdir = os.path.join(rel_path, name)
289 items.append((True, clean_name, rel_subdir))
290 else:
291 if filter_page_filename(name):
292 slug = self._makeSlug(os.path.join(rel_path, name))
293
294 fac_path = name
295 if rel_path != '.':
296 fac_path = os.path.join(rel_path, name)
297 fac_path = fac_path.replace('\\', '/')
298
299 config = self._extractConfigFragment(fac_path)
300 metadata = {'slug': slug, 'config': config}
301 fac = PageFactory(self, fac_path, metadata)
302
303 name, _ = os.path.splitext(name)
304 items.append((False, clean_name, fac))
305 return items
306
307 def _cleanSlug(self, slug):
308 return self.re_pattern.sub(r'\1', slug)
309 174
310 def _extractConfigFragment(self, rel_path): 175 def _extractConfigFragment(self, rel_path):
311 values = [] 176 values = []
312 for m in self.re_pattern.finditer(rel_path): 177 for m in self.re_pattern.finditer(rel_path):
313 val = int(m.group('num')) 178 val = int(m.group('num'))
315 180
316 if len(values) == 0: 181 if len(values) == 0:
317 values.append(self.default_value) 182 values.append(self.default_value)
318 183
319 return { 184 return {
320 self.setting_name: values[-1], 185 self.setting_name: values[-1],
321 self.setting_name + '_trail': values} 186 self.setting_name + '_trail': values}
322 187
323 def _populateMetadata(self, rel_path, metadata, mode=None): 188 def _makeSlug(self, path):
324 _, filename = os.path.split(rel_path) 189 slug = super()._makeSlug(path)
325 config = self._extractConfigFragment(filename) 190 return self.re_pattern.sub(r'\1', slug)
326 metadata['config'] = config
327 slug = metadata['slug']
328 metadata['slug'] = self.re_pattern.sub(r'\1', slug)
329 191
330 192
331 class OrderTrailSortIterator(object): 193 class OrderTrailSortIterator(object):
332 def __init__(self, it, trail_name, value_accessor): 194 def __init__(self, it, trail_name, value_accessor):
333 self.it = it 195 self.it = it