comparison piecrust/sources/fs.py @ 853:f070a4fc033c

core: Continue PieCrust3 refactor, simplify pages. The asset pipeline is still the only function pipeline at this point. * No more `QualifiedPage`, and several other pieces of code deleted. * Data providers are simpler and more focused. For instance, the page iterator doesn't try to support other types of items. * Route parameters are proper known source metadata to remove the confusion between the two. * Make the baker and pipeline more correctly manage records and record histories. * Add support for record collapsing and deleting stale outputs in the asset pipeline.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 21 May 2017 00:06:59 -0700
parents 4850f8c21b6e
children 08e02c2a2a1a
comparison
equal deleted inserted replaced
852:4850f8c21b6e 853:f070a4fc033c
1 import os.path 1 import os.path
2 import re
3 import fnmatch
2 import logging 4 import logging
3 from piecrust import osutil 5 from piecrust import osutil
4 from piecrust.routing import RouteParameter 6 from piecrust.routing import RouteParameter
5 from piecrust.sources.base import ContentItem, ContentGroup, ContentSource 7 from piecrust.sources.base import ContentItem, ContentGroup, ContentSource
6 8
26 """ 28 """
27 def __init__(self, app, name, config): 29 def __init__(self, app, name, config):
28 super().__init__(app, name, config) 30 super().__init__(app, name, config)
29 self.fs_endpoint = config.get('fs_endpoint', name) 31 self.fs_endpoint = config.get('fs_endpoint', name)
30 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint) 32 self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
31 self._fs_filter = None
32 33
33 def _checkFSEndpoint(self): 34 def _checkFSEndpoint(self):
34 if not os.path.isdir(self.fs_endpoint_path): 35 if not os.path.isdir(self.fs_endpoint_path):
35 if self.config.get('ignore_missing_dir'): 36 if self.config.get('ignore_missing_dir'):
36 return False 37 return False
37 raise InvalidFileSystemEndpointError(self.name, 38 raise InvalidFileSystemEndpointError(self.name,
38 self.fs_endpoint_path) 39 self.fs_endpoint_path)
39 return True 40 return True
40 41
41 def openItem(self, item, mode='r'): 42 def openItem(self, item, mode='r', encoding=None):
42 for m in 'wxa+': 43 for m in 'wxa+':
43 if m in mode: 44 if m in mode:
44 # If opening the file for writing, let's make sure the 45 # If opening the file for writing, let's make sure the
45 # directory exists. 46 # directory exists.
46 dirname = os.path.dirname(item.spec) 47 dirname = os.path.dirname(item.spec)
47 if not os.path.exists(dirname): 48 if not os.path.exists(dirname):
48 os.makedirs(dirname, 0o755) 49 os.makedirs(dirname, 0o755)
49 break 50 break
50 return open(item.spec, mode) 51 return open(item.spec, mode, encoding=encoding)
51 52
52 def getItemMtime(self, item): 53 def getItemMtime(self, item):
53 return os.path.getmtime(item.spec) 54 return os.path.getmtime(item.spec)
55
56 def describe(self):
57 return {'endpoint_path': self.fs_endpoint_path}
54 58
55 59
56 class FSContentSource(FSContentSourceBase): 60 class FSContentSource(FSContentSourceBase):
57 """ Implements a `ContentSource` that simply returns files on disk 61 """ Implements a `ContentSource` that simply returns files on disk
58 under a given root directory. 62 under a given root directory.
59 """ 63 """
60 SOURCE_NAME = 'fs' 64 SOURCE_NAME = 'fs'
65
66 def __init__(self, app, name, config):
67 super().__init__(app, name, config)
68
69 config.setdefault('data_type', 'asset_iterator')
70
71 ig, ir = _parse_ignores(config.get('ignore'))
72 self._ignore_globs = ig
73 self._ignore_regexes = ir
61 74
62 def getContents(self, group): 75 def getContents(self, group):
63 logger.debug("Scanning for content in: %s" % self.fs_endpoint_path) 76 logger.debug("Scanning for content in: %s" % self.fs_endpoint_path)
64 if not self._checkFSEndpoint(): 77 if not self._checkFSEndpoint():
65 return None 78 return None
67 parent_path = self.fs_endpoint_path 80 parent_path = self.fs_endpoint_path
68 if group is not None: 81 if group is not None:
69 parent_path = group.spec 82 parent_path = group.spec
70 83
71 names = filter(_filter_crap_files, osutil.listdir(parent_path)) 84 names = filter(_filter_crap_files, osutil.listdir(parent_path))
72 if self._fs_filter is not None: 85
73 names = filter(self._fs_filter, names) 86 final_names = []
87 for name in names:
88 path = os.path.join(parent_path, name)
89 if not self._filterIgnored(path):
90 final_names.append(name)
74 91
75 items = [] 92 items = []
76 groups = [] 93 groups = []
77 for name in names: 94 for name in final_names:
78 path = os.path.join(parent_path, name) 95 path = os.path.join(parent_path, name)
79 if os.path.isdir(path): 96 if os.path.isdir(path):
80 metadata = self._createGroupMetadata(path) 97 metadata = self._createGroupMetadata(path)
81 groups.append(ContentGroup(path, metadata)) 98 groups.append(ContentGroup(path, metadata))
82 else: 99 else:
83 metadata = self._createItemMetadata(path) 100 metadata = self._createItemMetadata(path)
84 items.append(ContentItem(path, metadata)) 101 items.append(ContentItem(path, metadata))
85 self._finalizeContent(group, items, groups) 102 self._finalizeContent(group, items, groups)
86 return items + groups 103 return items + groups
104
105 def _filterIgnored(self, path):
106 rel_path = os.path.relpath(path, self.fs_endpoint_path)
107 for g in self._ignore_globs:
108 if fnmatch.fnmatch(rel_path, g):
109 return True
110 for r in self._ignore_regexes:
111 if r.search(g):
112 return True
113 return False
87 114
88 def _createGroupMetadata(self, path): 115 def _createGroupMetadata(self, path):
89 return {} 116 return {}
90 117
91 def _createItemMetadata(self, path): 118 def _createItemMetadata(self, path):
105 132
106 def getSupportedRouteParameters(self): 133 def getSupportedRouteParameters(self):
107 return [ 134 return [
108 RouteParameter('path', RouteParameter.TYPE_PATH)] 135 RouteParameter('path', RouteParameter.TYPE_PATH)]
109 136
110 def describe(self): 137
111 return {'endpoint_path': self.fs_endpoint_path} 138 def _parse_ignores(patterns):
139 globs = []
140 regexes = []
141 if patterns:
142 for pat in patterns:
143 if len(pat) > 2 and pat[0] == '/' and pat[-1] == '/':
144 regexes.append(re.compile(pat[1:-1]))
145 else:
146 globs.append(pat)
147 return globs, regexes