Mercurial > piecrust2
comparison piecrust/sources/autoconfig.py @ 242:f130365568ff
internal: Code reorganization to put less stuff in `sources.base`.
Interfaces that sources can implement are in `sources.interfaces`. The default
page source is in `sources.default`. The `SimplePageSource` is gone since most
subclasses only wanted to do *some* stuff the same, but *lots* of stuff
slightly different. I may have to revisit the code to extract exactly the code
that's in common.
| author | Ludovic Chabant <ludovic@chabant.com> |
|---|---|
| date | Wed, 18 Feb 2015 18:35:03 -0800 |
| parents | f43f19975671 |
| children | 3f740928043a |
comparison
equal
deleted
inserted
replaced
| 241:85a6c7ba5e3b | 242:f130365568ff |
|---|---|
| 1 import re | 1 import re |
| 2 import os | 2 import os |
| 3 import os.path | 3 import os.path |
| 4 import glob | |
| 5 import logging | 4 import logging |
| 6 from piecrust.configuration import ConfigurationError | 5 from piecrust.configuration import ConfigurationError |
| 7 from piecrust.data.iterators import SettingSortIterator | |
| 8 from piecrust.sources.base import ( | 6 from piecrust.sources.base import ( |
| 9 SimplePageSource, IPreparingSource, SimplePaginationSourceMixin, | 7 PageSource, PageFactory, InvalidFileSystemEndpointError) |
| 10 PageNotFoundError, InvalidFileSystemEndpointError, | 8 from piecrust.sources.default import ( |
| 11 PageFactory, MODE_CREATING, MODE_PARSING) | 9 filter_page_dirname, filter_page_filename) |
| 10 from piecrust.sources.interfaces import IListableSource | |
| 11 from piecrust.sources.mixins import SimplePaginationSourceMixin | |
| 12 | 12 |
| 13 | 13 |
| 14 logger = logging.getLogger(__name__) | 14 logger = logging.getLogger(__name__) |
| 15 | 15 |
| 16 | 16 |
| 17 class AutoConfigSourceBase(SimplePageSource, | 17 class AutoConfigSourceBase(PageSource, SimplePaginationSourceMixin, |
| 18 SimplePaginationSourceMixin): | 18 IListableSource): |
| 19 """ Base class for page sources that automatically apply configuration | 19 """ Base class for page sources that automatically apply configuration |
| 20 settings to their generated pages based on those pages' paths. | 20 settings to their generated pages based on those pages' paths. |
| 21 """ | 21 """ |
| 22 def __init__(self, app, name, config): | 22 def __init__(self, app, name, config): |
| 23 super(AutoConfigSourceBase, self).__init__(app, name, config) | 23 super(AutoConfigSourceBase, self).__init__(app, name, config) |
| 24 self.fs_endpoint = config.get('fs_endpoint', name) | |
| 25 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) | |
| 26 self.supported_extensions = list( | |
| 27 app.config.get('site/auto_formats').keys()) | |
| 28 self.default_auto_format = app.config.get('site/default_auto_format') | |
| 29 | |
| 24 self.capture_mode = config.get('capture_mode', 'path') | 30 self.capture_mode = config.get('capture_mode', 'path') |
| 25 if self.capture_mode not in ['path', 'dirname', 'filename']: | 31 if self.capture_mode not in ['path', 'dirname', 'filename']: |
| 26 raise ConfigurationError("Capture mode in source '%s' must be " | 32 raise ConfigurationError("Capture mode in source '%s' must be " |
| 27 "one of: path, dirname, filename" % | 33 "one of: path, dirname, filename" % |
| 28 name) | 34 name) |
| 29 | 35 |
| 30 def buildPageFactories(self): | 36 def buildPageFactories(self): |
| 37 logger.debug("Scanning for pages in: %s" % self.fs_endpoint_path) | |
| 31 if not os.path.isdir(self.fs_endpoint_path): | 38 if not os.path.isdir(self.fs_endpoint_path): |
| 32 raise InvalidFileSystemEndpointError(self.name, | 39 raise InvalidFileSystemEndpointError(self.name, |
| 33 self.fs_endpoint_path) | 40 self.fs_endpoint_path) |
| 34 | 41 |
| 35 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): | 42 for dirpath, dirnames, filenames in os.walk(self.fs_endpoint_path): |
| 36 if not filenames: | |
| 37 continue | |
| 38 | |
| 39 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) | 43 rel_dirpath = os.path.relpath(dirpath, self.fs_endpoint_path) |
| 44 dirnames[:] = list(filter(filter_page_dirname, dirnames)) | |
| 40 | 45 |
| 41 # If `capture_mode` is `dirname`, we don't need to recompute it | 46 # If `capture_mode` is `dirname`, we don't need to recompute it |
| 42 # for each filename, so we do it here. | 47 # for each filename, so we do it here. |
| 43 if self.capture_mode == 'dirname': | 48 if self.capture_mode == 'dirname': |
| 44 config = self.extractConfigFragment(rel_dirpath) | 49 config = self._extractConfigFragment(rel_dirpath) |
| 45 | 50 |
| 46 for f in filenames: | 51 for f in filter(filter_page_filename, filenames): |
| 47 if self.capture_mode == 'path': | 52 if self.capture_mode == 'path': |
| 48 path = os.path.join(rel_dirpath, f) | 53 path = os.path.join(rel_dirpath, f) |
| 49 config = self.extractConfigFragment(path) | 54 config = self._extractConfigFragment(path) |
| 50 elif self.capture_mode == 'filename': | 55 elif self.capture_mode == 'filename': |
| 51 config = self.extractConfigFragment(f) | 56 config = self._extractConfigFragment(f) |
| 52 | 57 |
| 53 fac_path = f | 58 fac_path = f |
| 54 if rel_dirpath != '.': | 59 if rel_dirpath != '.': |
| 55 fac_path = os.path.join(rel_dirpath, f) | 60 fac_path = os.path.join(rel_dirpath, f) |
| 56 | 61 |
| 57 slug = self.makeSlug(rel_dirpath, f) | 62 slug = self._makeSlug(fac_path) |
| 58 | 63 |
| 59 metadata = { | 64 metadata = { |
| 60 'slug': slug, | 65 'slug': slug, |
| 61 'config': config} | 66 'config': config} |
| 62 yield PageFactory(self, fac_path, metadata) | 67 yield PageFactory(self, fac_path, metadata) |
| 63 | 68 |
| 64 def makeSlug(self, rel_dirpath, filename): | 69 def resolveRef(self, ref_path): |
| 70 return os.path.normpath( | |
| 71 os.path.join(self.fs_endpoint_path, ref_path.lstrip("\\/"))) | |
| 72 | |
| 73 def listPath(self, rel_path): | |
| 65 raise NotImplementedError() | 74 raise NotImplementedError() |
| 66 | 75 |
| 67 def extractConfigFragment(self, rel_path): | 76 def getDirpath(self, rel_path): |
| 77 return os.path.dirname(rel_path) | |
| 78 | |
| 79 def getBasename(self, rel_path): | |
| 80 filename = os.path.basename(rel_path) | |
| 81 name, _ = os.path.splitext(filename) | |
| 82 return name | |
| 83 | |
| 84 def _makeSlug(self, rel_path): | |
| 68 raise NotImplementedError() | 85 raise NotImplementedError() |
| 69 | 86 |
| 70 def findPagePath(self, metadata, mode): | 87 def _extractConfigFragment(self, rel_path): |
| 71 raise NotImplementedError() | 88 raise NotImplementedError() |
| 72 | 89 |
| 73 | 90 |
| 74 class AutoConfigSource(AutoConfigSourceBase): | 91 class AutoConfigSource(AutoConfigSourceBase): |
| 75 """ Page source that extracts configuration settings from the sub-folders | 92 """ Page source that extracts configuration settings from the sub-folders |
| 86 self.collapse_single_values = config.get('collapse_single_values', | 103 self.collapse_single_values = config.get('collapse_single_values', |
| 87 False) | 104 False) |
| 88 self.supported_extensions = list( | 105 self.supported_extensions = list( |
| 89 app.config.get('site/auto_formats').keys()) | 106 app.config.get('site/auto_formats').keys()) |
| 90 | 107 |
| 91 def makeSlug(self, rel_dirpath, filename): | 108 def _makeSlug(self, rel_path): |
| 92 slug, ext = os.path.splitext(filename) | 109 slug, ext = os.path.splitext(os.path.basename(rel_path)) |
| 93 if ext.lstrip('.') not in self.supported_extensions: | 110 if ext.lstrip('.') not in self.supported_extensions: |
| 94 slug += ext | 111 slug += ext |
| 95 return slug | 112 return slug |
| 96 | 113 |
| 97 def extractConfigFragment(self, rel_path): | 114 def _extractConfigFragment(self, rel_path): |
| 98 if rel_path == '.': | 115 if rel_path == '.': |
| 99 values = [] | 116 values = [] |
| 100 else: | 117 else: |
| 101 values = rel_path.split(os.sep) | 118 values = rel_path.split(os.sep) |
| 102 | 119 |
| 124 for f in filenames: | 141 for f in filenames: |
| 125 slug, _ = os.path.splitext(f) | 142 slug, _ = os.path.splitext(f) |
| 126 if slug == metadata['slug']: | 143 if slug == metadata['slug']: |
| 127 path = os.path.join(dirpath, f) | 144 path = os.path.join(dirpath, f) |
| 128 rel_path = os.path.relpath(path, self.fs_endpoint_path) | 145 rel_path = os.path.relpath(path, self.fs_endpoint_path) |
| 129 config = self.extractConfigFragment(dirpath) | 146 config = self._extractConfigFragment(rel_path) |
| 130 metadata = {'slug': slug, 'config': config} | 147 metadata = {'slug': slug, 'config': config} |
| 131 return rel_path, metadata | 148 return rel_path, metadata |
| 149 | |
| 150 def listPath(self, rel_path): | |
| 151 rel_path = rel_path.lstrip('\\/') | |
| 152 path = os.path.join(self.fs_endpoint_path, rel_path) | |
| 153 names = sorted(os.listdir(path)) | |
| 154 items = [] | |
| 155 for name in names: | |
| 156 if os.path.isdir(os.path.join(path, name)): | |
| 157 if filter_page_dirname(name): | |
| 158 rel_subdir = os.path.join(rel_path, name) | |
| 159 items.append((True, name, rel_subdir)) | |
| 160 else: | |
| 161 if filter_page_filename(name): | |
| 162 cur_rel_path = os.path.join(rel_path, name) | |
| 163 slug = self._makeSlug(cur_rel_path) | |
| 164 config = self._extractConfigFragment(cur_rel_path) | |
| 165 metadata = {'slug': slug, 'config': config} | |
| 166 fac = PageFactory(self, cur_rel_path, metadata) | |
| 167 | |
| 168 name, _ = os.path.splitext(name) | |
| 169 items.append((False, name, fac)) | |
| 170 return items | |
| 132 | 171 |
| 133 | 172 |
| 134 class OrderedPageSource(AutoConfigSourceBase): | 173 class OrderedPageSource(AutoConfigSourceBase): |
| 135 """ A page source that assigns an "order" to its pages based on a | 174 """ A page source that assigns an "order" to its pages based on a |
| 136 numerical prefix in their filename. Page iterators will automatically | 175 numerical prefix in their filename. Page iterators will automatically |
| 139 SOURCE_NAME = 'ordered' | 178 SOURCE_NAME = 'ordered' |
| 140 | 179 |
| 141 re_pattern = re.compile(r'(^|/)(?P<num>\d+)_') | 180 re_pattern = re.compile(r'(^|/)(?P<num>\d+)_') |
| 142 | 181 |
| 143 def __init__(self, app, name, config): | 182 def __init__(self, app, name, config): |
| 144 config['capture_mode'] = 'filename' | 183 config['capture_mode'] = 'path' |
| 145 super(OrderedPageSource, self).__init__(app, name, config) | 184 super(OrderedPageSource, self).__init__(app, name, config) |
| 146 self.setting_name = config.get('setting_name', 'order') | 185 self.setting_name = config.get('setting_name', 'order') |
| 147 self.default_value = config.get('default_value', 0) | 186 self.default_value = config.get('default_value', 0) |
| 148 self.supported_extensions = list( | 187 self.supported_extensions = list( |
| 149 app.config.get('site/auto_formats').keys()) | 188 app.config.get('site/auto_formats').keys()) |
| 150 | 189 |
| 151 def makeSlug(self, rel_dirpath, filename): | 190 def findPagePath(self, metadata, mode): |
| 152 slug, ext = os.path.splitext(filename) | 191 uri_path = metadata.get('slug', '') |
| 192 if uri_path == '': | |
| 193 uri_path = '_index' | |
| 194 | |
| 195 path = self.fs_endpoint_path | |
| 196 uri_parts = uri_path.split('/') | |
| 197 for i, p in enumerate(uri_parts): | |
| 198 if i == len(uri_parts) - 1: | |
| 199 # Last part, this is the filename. We need to check for either | |
| 200 # the name, or the name with the prefix, but also handle a | |
| 201 # possible extension. | |
| 202 p_pat = r'(\d+_)?' + re.escape(p) | |
| 203 | |
| 204 _, ext = os.path.splitext(uri_path) | |
| 205 if ext == '': | |
| 206 p_pat += r'\.[\w\d]+' | |
| 207 | |
| 208 found = False | |
| 209 for name in os.listdir(path): | |
| 210 if re.match(p_pat, name): | |
| 211 path = os.path.join(path, name) | |
| 212 found = True | |
| 213 break | |
| 214 if not found: | |
| 215 return None, None | |
| 216 else: | |
| 217 # Find each sub-directory. It can either be a directory with | |
| 218 # the name itself, or the name with a number prefix. | |
| 219 p_pat = r'(\d+_)?' + re.escape(p) | |
| 220 found = False | |
| 221 for name in os.listdir(path): | |
| 222 if re.match(p_pat, name): | |
| 223 path = os.path.join(path, name) | |
| 224 found = True | |
| 225 break | |
| 226 if not found: | |
| 227 return None, None | |
| 228 | |
| 229 fac_path = os.path.relpath(path, self.fs_endpoint_path) | |
| 230 config = self._extractConfigFragment(fac_path) | |
| 231 metadata = {'slug': uri_path, 'config': config} | |
| 232 | |
| 233 return fac_path, metadata | |
| 234 | |
| 235 def getSorterIterator(self, it): | |
| 236 accessor = self.getSettingAccessor() | |
| 237 return OrderTrailSortIterator(it, self.setting_name + '_trail', | |
| 238 value_accessor=accessor) | |
| 239 | |
| 240 def listPath(self, rel_path): | |
| 241 rel_path = rel_path.lstrip('/') | |
| 242 path = self.fs_endpoint_path | |
| 243 if rel_path != '': | |
| 244 parts = rel_path.split('/') | |
| 245 for p in parts: | |
| 246 p_pat = r'(\d+_)?' + re.escape(p) | |
| 247 for name in os.listdir(path): | |
| 248 if re.match(p_pat, name): | |
| 249 path = os.path.join(path, name) | |
| 250 break | |
| 251 else: | |
| 252 raise Exception("No such path: %s" % rel_path) | |
| 253 | |
| 254 items = [] | |
| 255 names = sorted(os.listdir(path)) | |
| 256 for name in names: | |
| 257 if os.path.isdir(os.path.join(path, name)): | |
| 258 if filter_page_dirname(name): | |
| 259 rel_subdir = os.path.join(rel_path, name) | |
| 260 items.append((True, name, rel_subdir)) | |
| 261 else: | |
| 262 if filter_page_filename(name): | |
| 263 slug = self._makeSlug(os.path.join(rel_path, name)) | |
| 264 | |
| 265 fac_path = name | |
| 266 if rel_path != '.': | |
| 267 fac_path = os.path.join(rel_path, name) | |
| 268 fac_path = fac_path.replace('\\', '/') | |
| 269 | |
| 270 config = self._extractConfigFragment(fac_path) | |
| 271 metadata = {'slug': slug, 'config': config} | |
| 272 fac = PageFactory(self, fac_path, metadata) | |
| 273 | |
| 274 name, _ = os.path.splitext(name) | |
| 275 items.append((False, name, fac)) | |
| 276 return items | |
| 277 | |
| 278 def _makeSlug(self, rel_path): | |
| 279 slug, ext = os.path.splitext(rel_path) | |
| 153 if ext.lstrip('.') not in self.supported_extensions: | 280 if ext.lstrip('.') not in self.supported_extensions: |
| 154 slug += ext | 281 slug += ext |
| 155 slug = self.re_pattern.sub(r'\1', slug) | 282 slug = self.re_pattern.sub(r'\1', slug) |
| 156 slug = os.path.join(rel_dirpath, slug).replace('\\', '/') | |
| 157 if slug.startswith('./'): | |
| 158 slug = slug[2:] | |
| 159 return slug | 283 return slug |
| 160 | 284 |
| 161 def extractConfigFragment(self, rel_path): | 285 def _extractConfigFragment(self, rel_path): |
| 162 m = self.re_pattern.match(rel_path) | 286 values = [] |
| 163 if m is not None: | 287 for m in self.re_pattern.finditer(rel_path): |
| 164 val = int(m.group('num')) | 288 val = int(m.group('num')) |
| 165 else: | 289 values.append(val) |
| 166 val = self.default_value | 290 |
| 167 return {self.setting_name: val} | 291 if len(values) == 0: |
| 168 | 292 values.append(self.default_value) |
| 169 def findPagePath(self, metadata, mode): | 293 |
| 170 uri_path = metadata.get('slug', '') | 294 return { |
| 171 if uri_path != '': | 295 self.setting_name: values[-1], |
| 172 uri_parts = ['*_%s' % p for p in uri_path.split('/')] | 296 self.setting_name + '_trail': values} |
| 173 else: | |
| 174 uri_parts = ['*__index'] | |
| 175 uri_parts.insert(0, self.fs_endpoint_path) | |
| 176 path = os.path.join(*uri_parts) | |
| 177 | |
| 178 _, ext = os.path.splitext(uri_path) | |
| 179 if ext == '': | |
| 180 path += '.*' | |
| 181 | |
| 182 possibles = glob.glob(path) | |
| 183 | |
| 184 if len(possibles) == 0: | |
| 185 return None, None | |
| 186 | |
| 187 if len(possibles) > 1: | |
| 188 raise Exception("More than one path matching: %s" % uri_path) | |
| 189 | |
| 190 path = possibles[0] | |
| 191 fac_path = os.path.relpath(path, self.fs_endpoint_path) | |
| 192 | |
| 193 _, filename = os.path.split(path) | |
| 194 config = self.extractConfigFragment(filename) | |
| 195 metadata = {'slug': uri_path, 'config': config} | |
| 196 | |
| 197 return fac_path, metadata | |
| 198 | |
| 199 def getSorterIterator(self, it): | |
| 200 accessor = self.getSettingAccessor() | |
| 201 return SettingSortIterator(it, self.setting_name, | |
| 202 value_accessor=accessor) | |
| 203 | 297 |
| 204 def _populateMetadata(self, rel_path, metadata, mode=None): | 298 def _populateMetadata(self, rel_path, metadata, mode=None): |
| 205 _, filename = os.path.split(rel_path) | 299 _, filename = os.path.split(rel_path) |
| 206 config = self.extractConfigFragment(filename) | 300 config = self._extractConfigFragment(filename) |
| 207 metadata['config'] = config | 301 metadata['config'] = config |
| 208 slug = metadata['slug'] | 302 slug = metadata['slug'] |
| 209 metadata['slug'] = self.re_pattern.sub(r'\1', slug) | 303 metadata['slug'] = self.re_pattern.sub(r'\1', slug) |
| 210 | 304 |
| 305 | |
| 306 class OrderTrailSortIterator(object): | |
| 307 def __init__(self, it, trail_name, value_accessor): | |
| 308 self.it = it | |
| 309 self.trail_name = trail_name | |
| 310 self.value_accessor = value_accessor | |
| 311 | |
| 312 def __iter__(self): | |
| 313 return iter(sorted(self.it, key=self._key_getter)) | |
| 314 | |
| 315 def _key_getter(self, item): | |
| 316 values = self.value_accessor(item, self.trail_name) | |
| 317 key = ''.join(values) | |
| 318 return key | |
| 319 |
