comparison piecrust/sources/autoconfig.py @ 242:f130365568ff

internal: Code reorganization to put less stuff in `sources.base`. Interfaces that sources can implement are in `sources.interfaces`. The default page source is in `sources.default`. The `SimplePageSource` is gone since most subclasses only wanted to do *some* stuff the same, but *lots* of stuff slightly different. I may have to revisit the code to extract exactly the code that's in common.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 18 Feb 2015 18:35:03 -0800
parents f43f19975671
children 3f740928043a
comparison
equal deleted inserted replaced
241:85a6c7ba5e3b 242:f130365568ff
1 import re 1 import re
2 import os 2 import os
3 import os.path 3 import os.path
4 import glob
5 import logging 4 import logging
6 from piecrust.configuration import ConfigurationError 5 from piecrust.configuration import ConfigurationError
7 from piecrust.data.iterators import SettingSortIterator
8 from piecrust.sources.base import ( 6 from piecrust.sources.base import (
9 SimplePageSource, IPreparingSource, SimplePaginationSourceMixin, 7 PageSource, PageFactory, InvalidFileSystemEndpointError)
10 PageNotFoundError, InvalidFileSystemEndpointError, 8 from piecrust.sources.default import (
11 PageFactory, MODE_CREATING, MODE_PARSING) 9 filter_page_dirname, filter_page_filename)
10 from piecrust.sources.interfaces import IListableSource
11 from piecrust.sources.mixins import SimplePaginationSourceMixin
12 12
13 13
14 logger = logging.getLogger(__name__) 14 logger = logging.getLogger(__name__)
15 15
16 16
17 class AutoConfigSourceBase(SimplePageSource, 17 class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin,
18 SimplePaginationSourceMixin): 18 IListableSource):
19 """ Base class for page sources that automatically apply configuration 19 """ Base class for page sources that automatically apply configuration
20 settings to their generated pages based on those pages' paths. 20 settings to their generated pages based on those pages' paths.
21 """ 21 """
22 def __init__(self, app, name, config): 22 def __init__(self, app, name, config):
23 super(AutoConfigSourceBase, self).__init__(app, name, config) 23 super(AutoConfigSourceBase, self).__init__(app, name, config)
24 self.fs_endpoint = config.get('fs_endpoint', name)
25 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
26 self.supported_extensions = list(
27 app.config.get('site/auto_formats').keys())
28 self.default_auto_format = app.config.get('site/default_auto_format')
29
24 self.capture_mode = config.get('capture_mode', 'path') 30 self.capture_mode = config.get('capture_mode', 'path')
25 if self.capture_mode not in ['path', 'dirname', 'filename']: 31 if self.capture_mode not in ['path', 'dirname', 'filename']:
26 raise ConfigurationError("Capture mode in source '%s' must be " 32 raise ConfigurationError("Capture mode in source '%s' must be "
27 "one of: path, dirname, filename" % 33 "one of: path, dirname, filename" %
28 name) 34 name)
29 35
30 def buildPageFactories(self): 36 def buildPageFactories(self):
37 logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path)
31 if not os.path.isdir(self.fs_endpoint_path): 38 if not os.path.isdir(self.fs_endpoint_path):
32 raise InvalidFileSystemEndpointError(self.name, 39 raise InvalidFileSystemEndpointError(self.name,
33 self.fs_endpoint_path) 40 self.fs_endpoint_path)
34 41
35 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): 42 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path):
36 if not filenames:
37 continue
38
39 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) 43 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path)
44 dirnames[:] = list(filter(filter_page_dirname, dirnames))
40 45
41 # If `capture_mode` is `dirname`, we don't need to recompute it 46 # If `capture_mode` is `dirname`, we don't need to recompute it
42 # for each filename, so we do it here. 47 # for each filename, so we do it here.
43 if self.capture_mode == 'dirname': 48 if self.capture_mode == 'dirname':
44 config = self.extractConfigFragment(rel_dirpath) 49 config = self._extractConfigFragment(rel_dirpath)
45 50
46 for f in filenames: 51 for f in filter(filter_page_filename, filenames):
47 if self.capture_mode == 'path': 52 if self.capture_mode == 'path':
48 path = os.path.join(rel_dirpath, f) 53 path = os.path.join(rel_dirpath, f)
49 config = self.extractConfigFragment(path) 54 config = self._extractConfigFragment(path)
50 elif self.capture_mode == 'filename': 55 elif self.capture_mode == 'filename':
51 config = self.extractConfigFragment(f) 56 config = self._extractConfigFragment(f)
52 57
53 fac_path = f 58 fac_path = f
54 if rel_dirpath != '.': 59 if rel_dirpath != '.':
55 fac_path = os.path.join(rel_dirpath, f) 60 fac_path = os.path.join(rel_dirpath, f)
56 61
57 slug = self.makeSlug(rel_dirpath, f) 62 slug = self._makeSlug(fac_path)
58 63
59 metadata = { 64 metadata = {
60 'slug': slug, 65 'slug': slug,
61 'config': config} 66 'config': config}
62 yield PageFactory(self, fac_path, metadata) 67 yield PageFactory(self, fac_path, metadata)
63 68
64 def makeSlug(self, rel_dirpath, filename): 69 def resolveRef(self, ref_path):
70 return os.path.normpath(
71 os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/")))
72
73 def listPath(self, rel_path):
65 raise NotImplementedError() 74 raise NotImplementedError()
66 75
67 def extractConfigFragment(self, rel_path): 76 def getDirpath(self, rel_path):
77 return os.path.dirname(rel_path)
78
79 def getBasename(self, rel_path):
80 filename = os.path.basename(rel_path)
81 name, _ = os.path.splitext(filename)
82 return name
83
84 def _makeSlug(self, rel_path):
68 raise NotImplementedError() 85 raise NotImplementedError()
69 86
70 def findPagePath(self, metadata, mode): 87 def _extractConfigFragment(self, rel_path):
71 raise NotImplementedError() 88 raise NotImplementedError()
72 89
73 90
74 class AutoConfigSource(AutoConfigSourceBase): 91 class AutoConfigSource(AutoConfigSourceBase):
75 """ Page source that extracts configuration settings from the sub-folders 92 """ Page source that extracts configuration settings from the sub-folders
86 self.collapse_single_values = config.get('collapse_single_values', 103 self.collapse_single_values = config.get('collapse_single_values',
87 False) 104 False)
88 self.supported_extensions = list( 105 self.supported_extensions = list(
89 app.config.get('site/auto_formats').keys()) 106 app.config.get('site/auto_formats').keys())
90 107
91 def makeSlug(self, rel_dirpath, filename): 108 def _makeSlug(self, rel_path):
92 slug, ext = os.path.splitext(filename) 109 slug, ext = os.path.splitext(os.path.basename(rel_path))
93 if ext.lstrip('.') not in self.supported_extensions: 110 if ext.lstrip('.') not in self.supported_extensions:
94 slug += ext 111 slug += ext
95 return slug 112 return slug
96 113
97 def extractConfigFragment(self, rel_path): 114 def _extractConfigFragment(self, rel_path):
98 if rel_path == '.': 115 if rel_path == '.':
99 values = [] 116 values = []
100 else: 117 else:
101 values = rel_path.split(os.sep) 118 values = rel_path.split(os.sep)
102 119
124 for f in filenames: 141 for f in filenames:
125 slug, _ = os.path.splitext(f) 142 slug, _ = os.path.splitext(f)
126 if slug == metadata['slug']: 143 if slug == metadata['slug']:
127 path = os.path.join(dirpath, f) 144 path = os.path.join(dirpath, f)
128 rel_path = os.path.relpath(path, self.fs_endpoint_path) 145 rel_path = os.path.relpath(path, self.fs_endpoint_path)
129 config = self.extractConfigFragment(dirpath) 146 config = self._extractConfigFragment(rel_path)
130 metadata = {'slug': slug, 'config': config} 147 metadata = {'slug': slug, 'config': config}
131 return rel_path, metadata 148 return rel_path, metadata
149
150 def listPath(self, rel_path):
151 rel_path = rel_path.lstrip('\\/')
152 path = os.path.join(self.fs_endpoint_path, rel_path)
153 names = sorted(os.listdir(path))
154 items = []
155 for name in names:
156 if os.path.isdir(os.path.join(path, name)):
157 if filter_page_dirname(name):
158 rel_subdir = os.path.join(rel_path, name)
159 items.append((True, name, rel_subdir))
160 else:
161 if filter_page_filename(name):
162 cur_rel_path = os.path.join(rel_path, name)
163 slug = self._makeSlug(cur_rel_path)
164 config = self._extractConfigFragment(cur_rel_path)
165 metadata = {'slug': slug, 'config': config}
166 fac = PageFactory(self, cur_rel_path, metadata)
167
168 name, _ = os.path.splitext(name)
169 items.append((False, name, fac))
170 return items
132 171
133 172
134 class OrderedPageSource(AutoConfigSourceBase): 173 class OrderedPageSource(AutoConfigSourceBase):
135 """ A page source that assigns an "order" to its pages based on a 174 """ A page source that assigns an "order" to its pages based on a
136 numerical prefix in their filename. Page iterators will automatically 175 numerical prefix in their filename. Page iterators will automatically
139 SOURCE_NAME = 'ordered' 178 SOURCE_NAME = 'ordered'
140 179
141 re_pattern = re.compile(r'(^|/)(?P<num>\d+)_') 180 re_pattern = re.compile(r'(^|/)(?P<num>\d+)_')
142 181
143 def __init__(self, app, name, config): 182 def __init__(self, app, name, config):
144 config['capture_mode'] = 'filename' 183 config['capture_mode'] = 'path'
145 super(OrderedPageSource, self).__init__(app, name, config) 184 super(OrderedPageSource, self).__init__(app, name, config)
146 self.setting_name = config.get('setting_name', 'order') 185 self.setting_name = config.get('setting_name', 'order')
147 self.default_value = config.get('default_value', 0) 186 self.default_value = config.get('default_value', 0)
148 self.supported_extensions = list( 187 self.supported_extensions = list(
149 app.config.get('site/auto_formats').keys()) 188 app.config.get('site/auto_formats').keys())
150 189
151 def makeSlug(self, rel_dirpath, filename): 190 def findPagePath(self, metadata, mode):
152 slug, ext = os.path.splitext(filename) 191 uri_path = metadata.get('slug', '')
192 if uri_path == '':
193 uri_path = '_index'
194
195 path = self.fs_endpoint_path
196 uri_parts = uri_path.split('/')
197 for i, p in enumerate(uri_parts):
198 if i == len(uri_parts) - 1:
199 # Last part, this is the filename. We need to check for either
200 # the name, or the name with the prefix, but also handle a
201 # possible extension.
202 p_pat = r'(\d+_)?' + re.escape(p)
203
204 _, ext = os.path.splitext(uri_path)
205 if ext == '':
206 p_pat += r'\.[\w\d]+'
207
208 found = False
209 for name in os.listdir(path):
210 if re.match(p_pat, name):
211 path = os.path.join(path, name)
212 found = True
213 break
214 if not found:
215 return None, None
216 else:
217 # Find each sub-directory. It can either be a directory with
218 # the name itself, or the name with a number prefix.
219 p_pat = r'(\d+_)?' + re.escape(p)
220 found = False
221 for name in os.listdir(path):
222 if re.match(p_pat, name):
223 path = os.path.join(path, name)
224 found = True
225 break
226 if not found:
227 return None, None
228
229 fac_path = os.path.relpath(path, self.fs_endpoint_path)
230 config = self._extractConfigFragment(fac_path)
231 metadata = {'slug': uri_path, 'config': config}
232
233 return fac_path, metadata
234
235 def getSorterIterator(self, it):
236 accessor = self.getSettingAccessor()
237 return OrderTrailSortIterator(it, self.setting_name + '_trail',
238 value_accessor=accessor)
239
240 def listPath(self, rel_path):
241 rel_path = rel_path.lstrip('/')
242 path = self.fs_endpoint_path
243 if rel_path != '':
244 parts = rel_path.split('/')
245 for p in parts:
246 p_pat = r'(\d+_)?' + re.escape(p)
247 for name in os.listdir(path):
248 if re.match(p_pat, name):
249 path = os.path.join(path, name)
250 break
251 else:
252 raise Exception("No such path: %s" % rel_path)
253
254 items = []
255 names = sorted(os.listdir(path))
256 for name in names:
257 if os.path.isdir(os.path.join(path, name)):
258 if filter_page_dirname(name):
259 rel_subdir = os.path.join(rel_path, name)
260 items.append((True, name, rel_subdir))
261 else:
262 if filter_page_filename(name):
263 slug = self._makeSlug(os.path.join(rel_path, name))
264
265 fac_path = name
266 if rel_path != '.':
267 fac_path = os.path.join(rel_path, name)
268 fac_path = fac_path.replace('\\', '/')
269
270 config = self._extractConfigFragment(fac_path)
271 metadata = {'slug': slug, 'config': config}
272 fac = PageFactory(self, fac_path, metadata)
273
274 name, _ = os.path.splitext(name)
275 items.append((False, name, fac))
276 return items
277
278 def _makeSlug(self, rel_path):
279 slug, ext = os.path.splitext(rel_path)
153 if ext.lstrip('.') not in self.supported_extensions: 280 if ext.lstrip('.') not in self.supported_extensions:
154 slug += ext 281 slug += ext
155 slug = self.re_pattern.sub(r'\1', slug) 282 slug = self.re_pattern.sub(r'\1', slug)
156 slug = os.path.join(rel_dirpath, slug).replace('\\', '/')
157 if slug.startswith('./'):
158 slug = slug[2:]
159 return slug 283 return slug
160 284
161 def extractConfigFragment(self, rel_path): 285 def _extractConfigFragment(self, rel_path):
162 m = self.re_pattern.match(rel_path) 286 values = []
163 if m is not None: 287 for m in self.re_pattern.finditer(rel_path):
164 val = int(m.group('num')) 288 val = int(m.group('num'))
165 else: 289 values.append(val)
166 val = self.default_value 290
167 return {self.setting_name: val} 291 if len(values) == 0:
168 292 values.append(self.default_value)
169 def findPagePath(self, metadata, mode): 293
170 uri_path = metadata.get('slug', '') 294 return {
171 if uri_path != '': 295 self.setting_name: values[-1],
172 uri_parts = ['*_%s' % p for p in uri_path.split('/')] 296 self.setting_name + '_trail': values}
173 else:
174 uri_parts = ['*__index']
175 uri_parts.insert(0, self.fs_endpoint_path)
176 path = os.path.join(*uri_parts)
177
178 _, ext = os.path.splitext(uri_path)
179 if ext == '':
180 path += '.*'
181
182 possibles = glob.glob(path)
183
184 if len(possibles) == 0:
185 return None, None
186
187 if len(possibles) > 1:
188 raise Exception("More than one path matching: %s" % uri_path)
189
190 path = possibles[0]
191 fac_path = os.path.relpath(path, self.fs_endpoint_path)
192
193 _, filename = os.path.split(path)
194 config = self.extractConfigFragment(filename)
195 metadata = {'slug': uri_path, 'config': config}
196
197 return fac_path, metadata
198
199 def getSorterIterator(self, it):
200 accessor = self.getSettingAccessor()
201 return SettingSortIterator(it, self.setting_name,
202 value_accessor=accessor)
203 297
204 def _populateMetadata(self, rel_path, metadata, mode=None): 298 def _populateMetadata(self, rel_path, metadata, mode=None):
205 _, filename = os.path.split(rel_path) 299 _, filename = os.path.split(rel_path)
206 config = self.extractConfigFragment(filename) 300 config = self._extractConfigFragment(filename)
207 metadata['config'] = config 301 metadata['config'] = config
208 slug = metadata['slug'] 302 slug = metadata['slug']
209 metadata['slug'] = self.re_pattern.sub(r'\1', slug) 303 metadata['slug'] = self.re_pattern.sub(r'\1', slug)
210 304
305
306 class OrderTrailSortIterator(object):
307 def __init__(self, it, trail_name, value_accessor):
308 self.it = it
309 self.trail_name = trail_name
310 self.value_accessor = value_accessor
311
312 def __iter__(self):
313 return iter(sorted(self.it, key=self._key_getter))
314
315 def _key_getter(self, item):
316 values = self.value_accessor(item, self.trail_name)
317 key = ''.join(values)
318 return key
319