diff piecrust/sources/fs.py @ 853:f070a4fc033c

core: Continue PieCrust3 refactor, simplify pages. The asset pipeline is still the only function pipeline at this point. * No more `QualifiedPage`, and several other pieces of code deleted. * Data providers are simpler and more focused. For instance, the page iterator doesn't try to support other types of items. * Route parameters are proper known source metadata to remove the confusion between the two. * Make the baker and pipeline more correctly manage records and record histories. * Add support for record collapsing and deleting stale outputs in the asset pipeline.
author Ludovic Chabant <ludovic@chabant.com>
date Sun, 21 May 2017 00:06:59 -0700
parents 4850f8c21b6e
children 08e02c2a2a1a
line wrap: on
line diff
--- a/piecrust/sources/fs.py	Wed May 17 00:11:48 2017 -0700
+++ b/piecrust/sources/fs.py	Sun May 21 00:06:59 2017 -0700
@@ -1,4 +1,6 @@
 import os.path
+import re
+import fnmatch
 import logging
 from piecrust import osutil
 from piecrust.routing import RouteParameter
@@ -28,7 +30,6 @@
         super().__init__(app, name, config)
         self.fs_endpoint = config.get('fs_endpoint', name)
         self.fs_endpoint_path = os.path.join(self.root_dir, self.fs_endpoint)
-        self._fs_filter = None
 
     def _checkFSEndpoint(self):
         if not os.path.isdir(self.fs_endpoint_path):
@@ -38,7 +39,7 @@
                                                  self.fs_endpoint_path)
         return True
 
-    def openItem(self, item, mode='r'):
+    def openItem(self, item, mode='r', encoding=None):
         for m in 'wxa+':
             if m in mode:
                 # If opening the file for writing, let's make sure the
@@ -47,11 +48,14 @@
                 if not os.path.exists(dirname):
                     os.makedirs(dirname, 0o755)
                 break
-        return open(item.spec, mode)
+        return open(item.spec, mode, encoding=encoding)
 
     def getItemMtime(self, item):
         return os.path.getmtime(item.spec)
 
+    def describe(self):
+        return {'endpoint_path': self.fs_endpoint_path}
+
 
 class FSContentSource(FSContentSourceBase):
     """ Implements a `ContentSource` that simply returns files on disk
@@ -59,6 +63,15 @@
     """
     SOURCE_NAME = 'fs'
 
+    def __init__(self, app, name, config):
+        super().__init__(app, name, config)
+
+        config.setdefault('data_type', 'asset_iterator')
+
+        ig, ir = _parse_ignores(config.get('ignore'))
+        self._ignore_globs = ig
+        self._ignore_regexes = ir
+
     def getContents(self, group):
         logger.debug("Scanning for content in: %s" % self.fs_endpoint_path)
         if not self._checkFSEndpoint():
@@ -69,12 +82,16 @@
             parent_path = group.spec
 
         names = filter(_filter_crap_files, osutil.listdir(parent_path))
-        if self._fs_filter is not None:
-            names = filter(self._fs_filter, names)
+
+        final_names = []
+        for name in names:
+            path = os.path.join(parent_path, name)
+            if not self._filterIgnored(path):
+                final_names.append(name)
 
         items = []
         groups = []
-        for name in names:
+        for name in final_names:
             path = os.path.join(parent_path, name)
             if os.path.isdir(path):
                 metadata = self._createGroupMetadata(path)
@@ -85,6 +102,16 @@
         self._finalizeContent(group, items, groups)
         return items + groups
 
+    def _filterIgnored(self, path):
+        rel_path = os.path.relpath(path, self.fs_endpoint_path)
+        for g in self._ignore_globs:
+            if fnmatch.fnmatch(rel_path, g):
+                return True
+        for r in self._ignore_regexes:
+            if r.search(g):
+                return True
+        return False
+
     def _createGroupMetadata(self, path):
         return {}
 
@@ -107,5 +134,14 @@
         return [
             RouteParameter('path', RouteParameter.TYPE_PATH)]
 
-    def describe(self):
-        return {'endpoint_path': self.fs_endpoint_path}
+
+def _parse_ignores(patterns):
+    globs = []
+    regexes = []
+    if patterns:
+        for pat in patterns:
+            if len(pat) > 2 and pat[0] == '/' and pat[-1] == '/':
+                regexes.append(re.compile(pat[1:-1]))
+            else:
+                globs.append(pat)
+    return globs, regexes