Mercurial > piecrust2
changeset 60:6e60e0fef2be
Add `import` command, Jekyll importer.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 26 Aug 2014 23:20:48 -0700 |
parents | e3e3de44377c |
children | 64f37c4cce68 |
files | piecrust/commands/builtin/util.py piecrust/importing/__init__.py piecrust/importing/base.py piecrust/importing/jekyll.py piecrust/plugins/builtin.py |
diffstat | 4 files changed, 350 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/piecrust/commands/builtin/util.py Tue Aug 26 23:20:19 2014 -0700 +++ b/piecrust/commands/builtin/util.py Tue Aug 26 23:20:48 2014 -0700 @@ -114,3 +114,20 @@ f.write('---\n') f.write("This is a new page!\n") + +class ImportCommand(ChefCommand): + def __init__(self): + super(ImportCommand, self).__init__() + self.name = 'import' + self.description = "Imports content from another CMS into PieCrust." + + def setupParser(self, parser, app): + subparsers = parser.add_subparsers() + for i in app.plugin_loader.getImporters(): + p = subparsers.add_parser(i.name, help=i.description) + i.setupParser(p, app) + p.set_defaults(sub_func=i.checkedImportWebsite) + + def run(self, ctx): + ctx.args.sub_func(ctx) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/importing/base.py Tue Aug 26 23:20:48 2014 -0700 @@ -0,0 +1,39 @@ +import os.path +import codecs +import logging +import yaml +from piecrust.pathutil import SiteNotFoundError + + +logger = logging.getLogger(__name__) + + +class Importer(object): + def __init__(self): + self.name = None + self.description = None + + def setupParser(self, parser, app): + raise NotImplementedError() + + def importWebsite(self, app, args): + raise NotImplementedError() + + def checkedImportWebsite(self, ctx): + if ctx.app.root_dir is None: + raise SiteNotFoundError() + self.importWebsite(ctx.app, ctx.args) + return 0 + + +def create_page(app, endpoint_dir, slug, metadata, content): + path = os.path.join(app.root_dir, endpoint_dir, slug) + logging.debug("Creating page: %s" % os.path.relpath(path, app.root_dir)) + header = yaml.dump(metadata) + os.makedirs(os.path.dirname(path), 0o755, True) + with codecs.open(path, 'w', 'utf8') as fp: + fp.write("---\n") + fp.write(header) + fp.write("---\n") + fp.write(content) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/importing/jekyll.py Tue Aug 26 23:20:48 2014 -0700 @@ -0,0 +1,285 @@ +import os +import os.path +import re +import shutil +import yaml +import logging +from piecrust.configuration import parse_config_header +from piecrust.importing.base import Importer +from piecrust.uriutil import multi_replace + + +logger = logging.getLogger(__name__) + + +class JekyllImporter(Importer): + def __init__(self): + super(JekyllImporter, self).__init__() + self.name = 'jekyll' + self.description = "Imports content from a Jekyll or Octopress blog." + + def setupParser(self, parser, app): + parser.add_argument('root_dir', + help="The root directory of the Jekyll or Octopress website.") + + def importWebsite(self, app, args): + logger.debug("Importing Jekyll site from: %s" % args.root_dir) + for dirpath, dirnames, filenames in os.walk(args.root_dir): + dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames)) + for fn in filenames: + if fn[0] == '.': + continue + + full_fn = os.path.join(dirpath, fn) + rel_fn = os.path.relpath(full_fn, args.root_dir) + if rel_fn.startswith('.' + os.sep): + rel_fn = fn + + logger.debug("- %s" % rel_fn) + if rel_fn == '_config.yml': + self.convertConfig(app, full_fn) + elif rel_fn.startswith('_layouts'): + self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):]) + elif rel_fn.startswith('_includes'): + self.convertInclude(app, full_fn, rel_fn[len('_includes/'):]) + elif rel_fn.startswith('_posts'): + self.convertPost(app, full_fn, rel_fn[len('_posts/'):]) + else: + with open(full_fn, 'rb') as fp: + firstline = fp.read(3) + if firstline == '---': + self.convertPage(app, full_fn, rel_fn) + else: + self.convertStatic(app, full_fn, rel_fn) + + logger.info("The Jekyll website was successfully imported.") + + def convertConfig(self, app, src_path): + logger.debug(" Converting configuration file.") + with open(src_path, 'r', encoding='utf8') as fp: + config = yaml.load(fp) + + if 'site' not in config: + config['site'] = {} + config['site']['related_posts'] = [] + config['site']['posts_fs'] = 'flat' + config['site']['templates_dirs'] = ['includes', 'layouts'] + config['site']['tag_url'] = 'tags/%tag%' + if 'permalink' in config: + permalink = config['permalink'] + if permalink == 'date': + permalink = '/:categories/:year/:month/:day/:title.html' + elif permalink == 'pretty': + permalink = '/:categories/:year/:month/:day/:title/' + elif permalink == 'none': + permalink = '/:categories/:title.html' + + # TODO: handle `:categories` token. + post_url = multi_replace( + permalink, + {':year': '%year%', ':month': '%month%', ':day': '%day%', + ':title': '%slug%', ':categories': ''}) + post_url = post_url.replace('//', '/').strip('/') + config['site']['post_url'] = post_url + if 'exclude' in config: + if 'baker' not in config: + config['baker'] = {} + config['baker']['skip_patterns'] = list(map( + lambda i: '^/_%s/' % re.escape(i))) + if 'jinja' not in config: + config['jinja'] = {} + config['jinja']['auto_escape'] = False + if 'markdown' in config: + if not isinstance(config['markdown'], dict): + logger.warning("Discarding markdown setting: %s" % + config['markdown']) + del config['markdown'] + + with open(os.path.join(app.root_dir, 'config.yml'), 'w') as fp: + yaml.dump(config, stream=fp) + + def convertPage(self, app, path, rel_path): + logger.debug(" Converting page: %s" % rel_path) + is_index = False + is_static = False + _, ext = os.path.splitext(rel_path) + if re.search(r'^index\.(html?|textile|markdown)$', rel_path): + out_path = os.path.join(app.root_dir, 'pages', '_index' + ext) + is_index = True + else: + out_path = os.path.join(app.root_dir, 'pages', rel_path) + + if ext not in ['htm', 'html', 'textile', 'markdown']: + # There could be static files (SCSS or Less files) that look like + # pages because they have a YAML front matter. + is_static = True + out_path = os.path.join(app.root_dir, 'assets', rel_path) + + if is_static: + logger.debug(" Actually a static file... forwarding converstion.") + self.convertStatic(app, path, rel_path, True) + return + + self._doConvertPage(app, path, out_path) + if is_index: + shutil.copy2(out_path, os.path.join(app.root_dir, 'pages', '_tag.%s' % ext)) + + def convertPost(self, app, path, rel_path): + logger.debug(" Converting post: %s" % rel_path) + out_path = re.sub( + r'(\d{4}\-\d{2}\-\d{2})\-(.*)$', + r'\1_\2', + rel_path) + out_path = os.path.join(app.root_dir, 'posts', out_path) + self._doConvertPage(app, path, out_path) + + def convertLayout(self, app, path, rel_path): + logger.debug(" Converting layout: %s" % rel_path) + out_path = os.path.join(app.root_dir, 'layouts', rel_path) + self._doConvertPage(app, path, out_path, True) + + def convertInclude(self, app, path, rel_path): + logger.debug(" Converting include: %s" % rel_path) + out_path = os.path.join(app.root_dir, 'includes', rel_path) + self._doConvertPage(app, path, out_path, True) + + def convertStatic(self, app, path, rel_path, strip_header=False): + logger.debug(" Converting static: %s" % rel_path) + out_path = os.path.join(app.root_dir, 'assets', rel_path) + logger.debug(" %s -> %s" % (path, out_path)) + os.makedirs(os.path.dirname(out_path), 0o755, True) + + if strip_header: + with open(path, 'r', encoding='utf8') as fp: + content = fp.write() + config, offset = parse_config_header(content) + content = content[offset:] + with open(out_path, 'w', encoding='utf8') as fp: + fp.write(content) + return + + shutil.copy2(path, out_path) + + def _doConvertPage(self, app, path, out_path, is_template=False): + logger.debug(" %s -> %s" % (path, out_path)) + os.makedirs(os.path.dirname(out_path), 0o755, True) + + with open(path, 'r', encoding='utf8') as fp: + contents = fp.read() + + config, offset = parse_config_header(contents) + text = contents[offset:] + text_before = text + + wrap_content_tag = True + + if is_template: + if 'layout' in config: + # Liquid doesn't support template inheritance but + # Jinja does. + text = ("{%% extends '%s.html' %%}\n\n" + "{%% block jekyllcontent %%}\n" + "%s\n" + "{%% endblock %%}\n" % (config['layout'], text)) + wrap_content_tag = False + else: + if 'layout' in config: + if config['layout'] == 'nil': + config['layout'] = 'none' + + # Convert the template stuff we can: + # - content tag may have to be wrapped in a `jekyllcontent` + # because Jekyll uses implicit layout inheritance + # placements. + if wrap_content_tag: + text = re.sub( + r'{{\s*content\s*}}', + r'{% block jekyllcontent %}{{ content }}{% endblock %}', + text) + # - list of posts + text = re.sub( + '(?<=\{%|\{\{)([^\}]*)site.posts', + '\\1blog.posts', + text); + text = re.sub( + '(?<=\{%|\{\{)([^\}]*)paginator.posts', + '\\1pagination.posts', + text); + # - list of categories or tags + text = re.sub( + '(?<=\{%|\{\{)([^\}]*)site.categories', + '\\1blog.categories', + text); + text = re.sub( + '(?<=\{%|\{\{)([^\}]*)site.tags', + '\\1blog.tags', + text); + # - list of related posts + text = re.sub( + '(?<=\{%|\{\{)(?<!%\})site.related_posts', + '\\1pagination.related_posts', + text); + # - enumeration limits + text = re.sub( + '{%\s*for\s+([^}]+)\s+limit\:\s*(\d+)', + '{% for \\1[:\\2]', + text); + text = re.sub( + '{%\s*for\s+([^}]+)\s+offset\:\s*(\d+)', + '{% for \\1[\\2:]', + text); + # - code highlighting + text = re.sub( + '{%\s*highlight\s+([\w\d]+)\s*%}', + '{% geshi \'\\1\' %}', + text); + text = re.sub( + '{%\s*endhighlight\s*%}', + '{% endgeshi %}', + text); + # - unless tag + text = re.sub( + '{%\s*unless\s+([^}]+)\s*%}', + '{% if not \\1 %}', + text); + text = re.sub( + '{%\s*endunless\s*%}', + '{% endif %}', + text); + # - variable assignment + text = re.sub( + '\{%\s*assign\s+', + '{% set ', + text); + # - include tag + text = re.sub( + '\{%\s*include\s+([\w\d\.\-_]+)\s*%}', + '{% include "\\1" %}', + text); + # - truncate filter + text = re.sub( + '\|\s*truncate\:\s*(\d+)', + '|truncate(\\1)', + text); + # - date filter + text = re.sub( + '\|\s*date\:\s*"([^"]+)"', + '|date("\\1")', + text); + # - some filters we don't need + text = re.sub( + '\|\s*date_to_string', + '', + text); + + if text != text_before: + # We changed the text, so create a backup. + shutil.copy2(path, '%s.orig' % out_path) + + with open(out_path, 'w', encoding='utf8') as fp: + if not is_template: + fp.write("---\n") + fp.write(yaml.dump(config)) + fp.write("---\n") + fp.write(text) +
--- a/piecrust/plugins/builtin.py Tue Aug 26 23:20:19 2014 -0700 +++ b/piecrust/plugins/builtin.py Tue Aug 26 23:20:48 2014 -0700 @@ -1,11 +1,13 @@ +from piecrust.commands.base import HelpCommand from piecrust.commands.builtin.baking import (BakeCommand, ShowRecordCommand) from piecrust.commands.builtin.info import (RootCommand, ShowConfigCommand, FindCommand, ShowRoutesCommand, ShowPathsCommand) from piecrust.commands.builtin.serving import (ServeCommand) from piecrust.commands.builtin.util import (InitCommand, PurgeCommand, - PrepareCommand) + PrepareCommand, ImportCommand) from piecrust.data.provider import (IteratorDataProvider, BlogDataProvider) from piecrust.formatting.markdownformatter import MarkdownFormatter +from piecrust.importing.jekyll import JekyllImporter from piecrust.plugins.base import PieCrustPlugin from piecrust.processing.base import CopyFileProcessor from piecrust.processing.less import LessProcessor @@ -24,6 +26,8 @@ def getCommands(self): return [ InitCommand(), + ImportCommand(), + HelpCommand(), RootCommand(), PurgeCommand(), ShowConfigCommand(), @@ -64,3 +68,7 @@ LessProcessor(), SitemapProcessor()] + def getImporters(self): + return [ + JekyllImporter()] +