changeset 60:6e60e0fef2be

Add `import` command, Jekyll importer.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 26 Aug 2014 23:20:48 -0700
parents e3e3de44377c
children 64f37c4cce68
files piecrust/commands/builtin/util.py piecrust/importing/__init__.py piecrust/importing/base.py piecrust/importing/jekyll.py piecrust/plugins/builtin.py
diffstat 4 files changed, 350 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/piecrust/commands/builtin/util.py	Tue Aug 26 23:20:19 2014 -0700
+++ b/piecrust/commands/builtin/util.py	Tue Aug 26 23:20:48 2014 -0700
@@ -114,3 +114,20 @@
             f.write('---\n')
             f.write("This is a new page!\n")
 
+
+class ImportCommand(ChefCommand):
+    def __init__(self):
+        super(ImportCommand, self).__init__()
+        self.name = 'import'
+        self.description = "Imports content from another CMS into PieCrust."
+
+    def setupParser(self, parser, app):
+        subparsers = parser.add_subparsers()
+        for i in app.plugin_loader.getImporters():
+            p = subparsers.add_parser(i.name, help=i.description)
+            i.setupParser(p, app)
+            p.set_defaults(sub_func=i.checkedImportWebsite)
+
+    def run(self, ctx):
+        ctx.args.sub_func(ctx)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/importing/base.py	Tue Aug 26 23:20:48 2014 -0700
@@ -0,0 +1,39 @@
+import os.path
+import codecs
+import logging
+import yaml
+from piecrust.pathutil import SiteNotFoundError
+
+
+logger = logging.getLogger(__name__)
+
+
+class Importer(object):
+    def __init__(self):
+        self.name = None
+        self.description = None
+
+    def setupParser(self, parser, app):
+        raise NotImplementedError()
+
+    def importWebsite(self, app, args):
+        raise NotImplementedError()
+
+    def checkedImportWebsite(self, ctx):
+        if ctx.app.root_dir is None:
+            raise SiteNotFoundError()
+        self.importWebsite(ctx.app, ctx.args)
+        return 0
+
+
+def create_page(app, endpoint_dir, slug, metadata, content):
+    path = os.path.join(app.root_dir, endpoint_dir, slug)
+    logging.debug("Creating page: %s" % os.path.relpath(path, app.root_dir))
+    header = yaml.dump(metadata)
+    os.makedirs(os.path.dirname(path), 0o755, True)
+    with codecs.open(path, 'w', 'utf8') as fp:
+        fp.write("---\n")
+        fp.write(header)
+        fp.write("---\n")
+        fp.write(content)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/piecrust/importing/jekyll.py	Tue Aug 26 23:20:48 2014 -0700
@@ -0,0 +1,285 @@
+import os
+import os.path
+import re
+import shutil
+import yaml
+import logging
+from piecrust.configuration import parse_config_header
+from piecrust.importing.base import Importer
+from piecrust.uriutil import multi_replace
+
+
+logger = logging.getLogger(__name__)
+
+
+class JekyllImporter(Importer):
+    def __init__(self):
+        super(JekyllImporter, self).__init__()
+        self.name = 'jekyll'
+        self.description = "Imports content from a Jekyll or Octopress blog."
+
+    def setupParser(self, parser, app):
+        parser.add_argument('root_dir',
+                help="The root directory of the Jekyll or Octopress website.")
+
+    def importWebsite(self, app, args):
+        logger.debug("Importing Jekyll site from: %s" % args.root_dir)
+        for dirpath, dirnames, filenames in os.walk(args.root_dir):
+            dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames))
+            for fn in filenames:
+                if fn[0] == '.':
+                    continue
+
+                full_fn = os.path.join(dirpath, fn)
+                rel_fn = os.path.relpath(full_fn, args.root_dir)
+                if rel_fn.startswith('.' + os.sep):
+                    rel_fn = fn
+
+                logger.debug("- %s" % rel_fn)
+                if rel_fn == '_config.yml':
+                    self.convertConfig(app, full_fn)
+                elif rel_fn.startswith('_layouts'):
+                    self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):])
+                elif rel_fn.startswith('_includes'):
+                    self.convertInclude(app, full_fn, rel_fn[len('_includes/'):])
+                elif rel_fn.startswith('_posts'):
+                    self.convertPost(app, full_fn, rel_fn[len('_posts/'):])
+                else:
+                    with open(full_fn, 'rb') as fp:
+                        firstline = fp.read(3)
+                    if firstline == '---':
+                        self.convertPage(app, full_fn, rel_fn)
+                    else:
+                        self.convertStatic(app, full_fn, rel_fn)
+
+        logger.info("The Jekyll website was successfully imported.")
+
+    def convertConfig(self, app, src_path):
+        logger.debug("  Converting configuration file.")
+        with open(src_path, 'r', encoding='utf8') as fp:
+            config = yaml.load(fp)
+
+        if 'site' not in config:
+            config['site'] = {}
+        config['site']['related_posts'] = []
+        config['site']['posts_fs'] = 'flat'
+        config['site']['templates_dirs'] = ['includes', 'layouts']
+        config['site']['tag_url'] = 'tags/%tag%'
+        if 'permalink' in config:
+            permalink = config['permalink']
+            if permalink == 'date':
+                permalink = '/:categories/:year/:month/:day/:title.html'
+            elif permalink == 'pretty':
+                permalink = '/:categories/:year/:month/:day/:title/'
+            elif permalink == 'none':
+                permalink = '/:categories/:title.html'
+
+            # TODO: handle `:categories` token.
+            post_url = multi_replace(
+                    permalink,
+                    {':year': '%year%', ':month': '%month%', ':day': '%day%',
+                        ':title': '%slug%', ':categories': ''})
+            post_url = post_url.replace('//', '/').strip('/')
+            config['site']['post_url'] = post_url
+        if 'exclude' in config:
+            if 'baker' not in config:
+                config['baker'] = {}
+            config['baker']['skip_patterns'] = list(map(
+                    lambda i: '^/_%s/' % re.escape(i)))
+        if 'jinja' not in config:
+            config['jinja'] = {}
+        config['jinja']['auto_escape'] = False
+        if 'markdown' in config:
+            if not isinstance(config['markdown'], dict):
+                logger.warning("Discarding markdown setting: %s" %
+                        config['markdown'])
+                del config['markdown']
+
+        with open(os.path.join(app.root_dir, 'config.yml'), 'w') as fp:
+            yaml.dump(config, stream=fp)
+
+    def convertPage(self, app, path, rel_path):
+        logger.debug("  Converting page: %s" % rel_path)
+        is_index = False
+        is_static = False
+        _, ext = os.path.splitext(rel_path)
+        if re.search(r'^index\.(html?|textile|markdown)$', rel_path):
+            out_path = os.path.join(app.root_dir, 'pages', '_index' + ext)
+            is_index = True
+        else:
+            out_path = os.path.join(app.root_dir, 'pages', rel_path)
+
+        if ext not in ['htm', 'html', 'textile', 'markdown']:
+            # There could be static files (SCSS or Less files) that look like
+            # pages because they have a YAML front matter.
+            is_static = True
+            out_path = os.path.join(app.root_dir, 'assets', rel_path)
+
+        if is_static:
+            logger.debug("  Actually a static file... forwarding converstion.")
+            self.convertStatic(app, path, rel_path, True)
+            return
+
+        self._doConvertPage(app, path, out_path)
+        if is_index:
+            shutil.copy2(out_path, os.path.join(app.root_dir, 'pages', '_tag.%s' % ext))
+
+    def convertPost(self, app, path, rel_path):
+        logger.debug("  Converting post: %s" % rel_path)
+        out_path = re.sub(
+                r'(\d{4}\-\d{2}\-\d{2})\-(.*)$',
+                r'\1_\2',
+                rel_path)
+        out_path = os.path.join(app.root_dir, 'posts', out_path)
+        self._doConvertPage(app, path, out_path)
+
+    def convertLayout(self, app, path, rel_path):
+        logger.debug("  Converting layout: %s" % rel_path)
+        out_path = os.path.join(app.root_dir, 'layouts', rel_path)
+        self._doConvertPage(app, path, out_path, True)
+
+    def convertInclude(self, app, path, rel_path):
+        logger.debug("  Converting include: %s" % rel_path)
+        out_path = os.path.join(app.root_dir, 'includes', rel_path)
+        self._doConvertPage(app, path, out_path, True)
+
+    def convertStatic(self, app, path, rel_path, strip_header=False):
+        logger.debug("  Converting static: %s" % rel_path)
+        out_path = os.path.join(app.root_dir, 'assets', rel_path)
+        logger.debug("  %s -> %s" % (path, out_path))
+        os.makedirs(os.path.dirname(out_path), 0o755, True)
+
+        if strip_header:
+            with open(path, 'r', encoding='utf8') as fp:
+                content = fp.write()
+            config, offset = parse_config_header(content)
+            content = content[offset:]
+            with open(out_path, 'w', encoding='utf8') as fp:
+                fp.write(content)
+            return
+
+        shutil.copy2(path, out_path)
+
+    def _doConvertPage(self, app, path, out_path, is_template=False):
+        logger.debug("  %s -> %s" % (path, out_path))
+        os.makedirs(os.path.dirname(out_path), 0o755, True)
+
+        with open(path, 'r', encoding='utf8') as fp:
+            contents = fp.read()
+
+        config, offset = parse_config_header(contents)
+        text = contents[offset:]
+        text_before = text
+
+        wrap_content_tag = True
+
+        if is_template:
+            if 'layout' in config:
+                # Liquid doesn't support template inheritance but
+                # Jinja does.
+                text = ("{%% extends '%s.html' %%}\n\n"
+                        "{%% block jekyllcontent %%}\n"
+                        "%s\n"
+                        "{%% endblock %%}\n" % (config['layout'], text))
+                wrap_content_tag = False
+        else:
+            if 'layout' in config:
+                if config['layout'] == 'nil':
+                    config['layout'] = 'none'
+
+        # Convert the template stuff we can:
+        # - content tag may have to be wrapped in a `jekyllcontent`
+        #   because Jekyll uses implicit layout inheritance
+        #   placements.
+        if wrap_content_tag:
+            text = re.sub(
+                    r'{{\s*content\s*}}',
+                    r'{% block jekyllcontent %}{{ content }}{% endblock %}',
+                    text)
+        # - list of posts
+        text = re.sub(
+            '(?<=\{%|\{\{)([^\}]*)site.posts',
+            '\\1blog.posts',
+            text);
+        text = re.sub(
+            '(?<=\{%|\{\{)([^\}]*)paginator.posts',
+            '\\1pagination.posts',
+            text);
+        # - list of categories or tags
+        text = re.sub(
+            '(?<=\{%|\{\{)([^\}]*)site.categories',
+            '\\1blog.categories',
+            text);
+        text = re.sub(
+            '(?<=\{%|\{\{)([^\}]*)site.tags',
+            '\\1blog.tags',
+            text);
+        # - list of related posts
+        text = re.sub(
+            '(?<=\{%|\{\{)(?<!%\})site.related_posts',
+            '\\1pagination.related_posts',
+            text);
+        # - enumeration limits
+        text = re.sub(
+            '{%\s*for\s+([^}]+)\s+limit\:\s*(\d+)',
+            '{% for \\1[:\\2]',
+            text);
+        text = re.sub(
+            '{%\s*for\s+([^}]+)\s+offset\:\s*(\d+)',
+            '{% for \\1[\\2:]',
+            text);
+        # - code highlighting
+        text = re.sub(
+            '{%\s*highlight\s+([\w\d]+)\s*%}',
+            '{% geshi \'\\1\' %}',
+            text);
+        text = re.sub(
+            '{%\s*endhighlight\s*%}',
+            '{% endgeshi %}',
+            text);
+        # - unless tag
+        text = re.sub(
+            '{%\s*unless\s+([^}]+)\s*%}',
+            '{% if not \\1 %}',
+            text);
+        text = re.sub(
+            '{%\s*endunless\s*%}',
+            '{% endif %}',
+            text);
+        # - variable assignment
+        text = re.sub(
+            '\{%\s*assign\s+',
+            '{% set ',
+            text);
+        # - include tag
+        text = re.sub(
+            '\{%\s*include\s+([\w\d\.\-_]+)\s*%}',
+            '{% include "\\1" %}',
+            text);
+        # - truncate filter
+        text = re.sub(
+            '\|\s*truncate\:\s*(\d+)',
+            '|truncate(\\1)',
+            text);
+        # - date filter
+        text = re.sub(
+            '\|\s*date\:\s*"([^"]+)"',
+            '|date("\\1")',
+            text);
+        # - some filters we don't need
+        text = re.sub(
+            '\|\s*date_to_string',
+            '',
+            text);
+
+        if text != text_before:
+            # We changed the text, so create a backup.
+            shutil.copy2(path, '%s.orig' % out_path)
+
+        with open(out_path, 'w', encoding='utf8') as fp:
+            if not is_template:
+                fp.write("---\n")
+                fp.write(yaml.dump(config))
+                fp.write("---\n")
+            fp.write(text)
+
--- a/piecrust/plugins/builtin.py	Tue Aug 26 23:20:19 2014 -0700
+++ b/piecrust/plugins/builtin.py	Tue Aug 26 23:20:48 2014 -0700
@@ -1,11 +1,13 @@
+from piecrust.commands.base import HelpCommand
 from piecrust.commands.builtin.baking import (BakeCommand, ShowRecordCommand)
 from piecrust.commands.builtin.info import (RootCommand, ShowConfigCommand,
         FindCommand, ShowRoutesCommand, ShowPathsCommand)
 from piecrust.commands.builtin.serving import (ServeCommand)
 from piecrust.commands.builtin.util import (InitCommand, PurgeCommand,
-        PrepareCommand)
+        PrepareCommand, ImportCommand)
 from piecrust.data.provider import (IteratorDataProvider, BlogDataProvider)
 from piecrust.formatting.markdownformatter import MarkdownFormatter
+from piecrust.importing.jekyll import JekyllImporter
 from piecrust.plugins.base import PieCrustPlugin
 from piecrust.processing.base import CopyFileProcessor
 from piecrust.processing.less import LessProcessor
@@ -24,6 +26,8 @@
     def getCommands(self):
         return [
                 InitCommand(),
+                ImportCommand(),
+                HelpCommand(),
                 RootCommand(),
                 PurgeCommand(),
                 ShowConfigCommand(),
@@ -64,3 +68,7 @@
                 LessProcessor(),
                 SitemapProcessor()]
 
+    def getImporters(self):
+        return [
+                JekyllImporter()]
+