changeset 62:52e4d9a1f917

Simple importer for PieCrust 1 websites.
author Ludovic Chabant <ludovic@chabant.com>
date Wed, 27 Aug 2014 17:14:44 -0700
parents 64f37c4cce68
children 28958565a17b
files piecrust/importing/base.py piecrust/importing/jekyll.py piecrust/importing/piecrust.py piecrust/pathutil.py piecrust/plugins/builtin.py
diffstat 5 files changed, 142 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/importing/base.py	Wed Aug 27 10:23:32 2014 -0700
+++ b/piecrust/importing/base.py	Wed Aug 27 17:14:44 2014 -0700
@@ -2,7 +2,7 @@
 import codecs
 import logging
 import yaml
-from piecrust.pathutil import SiteNotFoundError
+from piecrust.pathutil import SiteNotFoundError, multi_fnmatch_filter
 
 
 logger = logging.getLogger(__name__)
@@ -26,12 +26,44 @@
         return 0
 
 
+class FileWalkingImporter(Importer):
+    def setupParser(self, parser, app):
+        parser.add_argument('--exclude', nargs='+',
+                help=("Patterns of files and directories to exclude "
+                      "from the import (always includes `.git*`, "
+                      "`.hg*`, `.svn`, `.bzr`)."))
+
+    def _startWalk(self, root_dir, exclude, *args, **kwargs):
+        if exclude is None:
+            exclude = []
+        exclude += ['.git*', '.hg*', '.svn', '.bzr']
+
+        for dirpath, dirnames, filenames in os.walk(root_dir):
+            rel_dirpath = os.path.relpath(dirpath, root_dir)
+            if rel_dirpath == '.':
+                rel_dirpath = ''
+
+            dirnames[:] = multi_fnmatch_filter(
+                    dirnames, exclude,
+                    modifier=lambda d: os.path.join(rel_dirpath, d),
+                    inverse=True)
+            filenames = multi_fnmatch_filter(
+                    filenames, exclude,
+                    modifier=lambda f: os.path.join(rel_dirpath, f),
+                    inverse=True)
+
+            for fn in filenames:
+                full_fn = os.path.join(dirpath, fn)
+                rel_fn = os.path.join(rel_dirpath, fn)
+                self._importFile(full_fn, rel_fn, *args, **kwargs)
+
+
 def create_page(app, endpoint_dir, slug, metadata, content):
     path = os.path.join(app.root_dir, endpoint_dir, slug)
     logging.debug("Creating page: %s" % os.path.relpath(path, app.root_dir))
     header = yaml.dump(metadata)
     os.makedirs(os.path.dirname(path), 0o755, True)
-    with codecs.open(path, 'w', 'utf8') as fp:
+    with codecs.open(path, 'w', encoding='utf8') as fp:
         fp.write("---\n")
         fp.write(header)
         fp.write("---\n")
--- a/piecrust/importing/jekyll.py	Wed Aug 27 10:23:32 2014 -0700
+++ b/piecrust/importing/jekyll.py	Wed Aug 27 17:14:44 2014 -0700
@@ -5,54 +5,46 @@
 import yaml
 import logging
 from piecrust.configuration import parse_config_header
-from piecrust.importing.base import Importer
+from piecrust.importing.base import FileWalkingImporter
 from piecrust.uriutil import multi_replace
 
 
 logger = logging.getLogger(__name__)
 
 
-class JekyllImporter(Importer):
+class JekyllImporter(FileWalkingImporter):
     def __init__(self):
         super(JekyllImporter, self).__init__()
         self.name = 'jekyll'
         self.description = "Imports content from a Jekyll or Octopress blog."
 
     def setupParser(self, parser, app):
+        super(JekyllImporter, self).setupParser(parser, app)
         parser.add_argument('root_dir',
                 help="The root directory of the Jekyll or Octopress website.")
 
     def importWebsite(self, app, args):
         logger.debug("Importing Jekyll site from: %s" % args.root_dir)
-        for dirpath, dirnames, filenames in os.walk(args.root_dir):
-            dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames))
-            for fn in filenames:
-                if fn[0] == '.':
-                    continue
-
-                full_fn = os.path.join(dirpath, fn)
-                rel_fn = os.path.relpath(full_fn, args.root_dir)
-                if rel_fn.startswith('.' + os.sep):
-                    rel_fn = fn
+        self._startWalk(args.root_dir, args.exclude, app)
+        logger.info("The Jekyll website was successfully imported.")
 
-                logger.debug("- %s" % rel_fn)
-                if rel_fn == '_config.yml':
-                    self.convertConfig(app, full_fn)
-                elif rel_fn.startswith('_layouts'):
-                    self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):])
-                elif rel_fn.startswith('_includes'):
-                    self.convertInclude(app, full_fn, rel_fn[len('_includes/'):])
-                elif rel_fn.startswith('_posts'):
-                    self.convertPost(app, full_fn, rel_fn[len('_posts/'):])
-                else:
-                    with open(full_fn, 'rb') as fp:
-                        firstline = fp.read(3)
-                    if firstline == '---':
-                        self.convertPage(app, full_fn, rel_fn)
-                    else:
-                        self.convertStatic(app, full_fn, rel_fn)
-
-        logger.info("The Jekyll website was successfully imported.")
+    def _importFile(self, full_fn, rel_fn, app):
+        logger.debug("- %s" % rel_fn)
+        if rel_fn == '_config.yml':
+            self.convertConfig(app, full_fn)
+        elif rel_fn.startswith('_layouts'):
+            self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):])
+        elif rel_fn.startswith('_includes'):
+            self.convertInclude(app, full_fn, rel_fn[len('_includes/'):])
+        elif rel_fn.startswith('_posts'):
+            self.convertPost(app, full_fn, rel_fn[len('_posts/'):])
+        else:
+            with open(full_fn, 'rb') as fp:
+                firstline = fp.read(3)
+            if firstline == '---':
+                self.convertPage(app, full_fn, rel_fn)
+            else:
+                self.convertStatic(app, full_fn, rel_fn)
 
     def convertConfig(self, app, src_path):
         logger.debug("  Converting configuration file.")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/importing/piecrust.py	Wed Aug 27 17:14:44 2014 -0700
@@ -0,0 +1,66 @@
+import os
+import os.path
+import re
+import shutil
+import logging
+from piecrust.importing.base import FileWalkingImporter
+
+
+logger = logging.getLogger(__name__)
+
+
+class PieCrust1Importer(FileWalkingImporter):
+    def __init__(self):
+        super(PieCrust1Importer, self).__init__()
+        self.name = 'piecrust1'
+        self.description = "Imports content from a PieCrust 1 website."
+
+    def setupParser(self, parser, app):
+        super(PieCrust1Importer, self).setupParser(parser, app)
+        parser.add_argument('root_dir',
+                help="The root directory of the PieCrust 1 website.")
+
+    def importWebsite(self, app, args):
+        logger.debug("Importing PieCrust 1 site from: %s" % args.root_dir)
+        exclude = args.exclude or []
+        exclude += ['_cache', '_counter']
+        self._startWalk(args.root_dir, exclude, app)
+        logger.info("The PieCrust website was successfully imported.")
+
+    def _importFile(self, full_fn, rel_fn, app):
+        logger.debug("- %s" % rel_fn)
+        dest_path = rel_fn
+        convert_func = None
+        if rel_fn.replace('\\', '/') == '_content/config.yml':
+            dest_path = 'config.yml'
+            convert_func = self.convertConfig
+        elif rel_fn.startswith('_content'):
+            dest_path = rel_fn[len('_content/'):]
+            fn_dirname = os.path.dirname(rel_fn)
+            if not fn_dirname.endswith('-assets'):
+                convert_func = self.convertPage
+        else:
+            dest_path = 'assets/' + rel_fn
+
+        logger.debug("  %s -> %s" % (rel_fn, dest_path))
+        full_dest_path = os.path.join(app.root_dir, dest_path)
+        os.makedirs(os.path.dirname(full_dest_path), 0o755, True)
+        if convert_func is None:
+            shutil.copy2(full_fn, full_dest_path)
+        else:
+            with open(full_fn, 'r', encoding='utf8') as fp:
+                content = fp.read()
+            converted_content = convert_func(content)
+            with open(full_dest_path, 'w', encoding='utf8') as fp:
+                fp.write(converted_content)
+            if converted_content != content:
+                logger.warning("'%s' has been modified. The original version "
+                               "has been kept for reference." % rel_fn)
+                shutil.copy2(full_fn, full_dest_path + '.orig')
+
+    def convertConfig(self, content):
+        return content
+
+    def convertPage(self, content):
+        return content
+
--- a/piecrust/pathutil.py	Wed Aug 27 10:23:32 2014 -0700
+++ b/piecrust/pathutil.py	Wed Aug 27 17:14:44 2014 -0700
@@ -1,6 +1,7 @@
 import re
 import os
 import os.path
+import fnmatch
 
 
 re_terminal_path = re.compile(r'[/\\]|(\w\:)')
@@ -30,3 +31,19 @@
             raise SiteNotFoundError(cwd)
     return cwd
 
+
+def multi_fnmatch_filter(names, patterns, modifier=None, inverse=True):
+    res = []
+    for n in names:
+        matches = False
+        test_n = modifier(n) if modifier else n
+        for p in patterns:
+            if fnmatch.fnmatch(test_n, p):
+                matches = True
+                break
+        if matches and not inverse:
+            res.append(n)
+        elif not matches and inverse:
+            res.append(n)
+    return res
+
--- a/piecrust/plugins/builtin.py	Wed Aug 27 10:23:32 2014 -0700
+++ b/piecrust/plugins/builtin.py	Wed Aug 27 17:14:44 2014 -0700
@@ -8,6 +8,7 @@
 from piecrust.data.provider import (IteratorDataProvider, BlogDataProvider)
 from piecrust.formatting.markdownformatter import MarkdownFormatter
 from piecrust.importing.jekyll import JekyllImporter
+from piecrust.importing.piecrust import PieCrust1Importer
 from piecrust.plugins.base import PieCrustPlugin
 from piecrust.processing.base import CopyFileProcessor
 from piecrust.processing.less import LessProcessor
@@ -70,5 +71,6 @@
 
     def getImporters(self):
         return [
-                JekyllImporter()]
+                JekyllImporter(),
+                PieCrust1Importer()]