# HG changeset patch # User Ludovic Chabant # Date 1409184884 25200 # Node ID 52e4d9a1f917c9366d80272e3a0c9c47bbe6afd0 # Parent 64f37c4cce688df59ab7a051d6b2753fc5f69425 Simple importer for PieCrust 1 websites. diff -r 64f37c4cce68 -r 52e4d9a1f917 piecrust/importing/base.py --- a/piecrust/importing/base.py Wed Aug 27 10:23:32 2014 -0700 +++ b/piecrust/importing/base.py Wed Aug 27 17:14:44 2014 -0700 @@ -2,7 +2,7 @@ import codecs import logging import yaml -from piecrust.pathutil import SiteNotFoundError +from piecrust.pathutil import SiteNotFoundError, multi_fnmatch_filter logger = logging.getLogger(__name__) @@ -26,12 +26,44 @@ return 0 +class FileWalkingImporter(Importer): + def setupParser(self, parser, app): + parser.add_argument('--exclude', nargs='+', + help=("Patterns of files and directories to exclude " + "from the import (always includes `.git*`, " + "`.hg*`, `.svn`, `.bzr`).")) + + def _startWalk(self, root_dir, exclude, *args, **kwargs): + if exclude is None: + exclude = [] + exclude += ['.git*', '.hg*', '.svn', '.bzr'] + + for dirpath, dirnames, filenames in os.walk(root_dir): + rel_dirpath = os.path.relpath(dirpath, root_dir) + if rel_dirpath == '.': + rel_dirpath = '' + + dirnames[:] = multi_fnmatch_filter( + dirnames, exclude, + modifier=lambda d: os.path.join(rel_dirpath, d), + inverse=True) + filenames = multi_fnmatch_filter( + filenames, exclude, + modifier=lambda f: os.path.join(rel_dirpath, f), + inverse=True) + + for fn in filenames: + full_fn = os.path.join(dirpath, fn) + rel_fn = os.path.join(rel_dirpath, fn) + self._importFile(full_fn, rel_fn, *args, **kwargs) + + def create_page(app, endpoint_dir, slug, metadata, content): path = os.path.join(app.root_dir, endpoint_dir, slug) logging.debug("Creating page: %s" % os.path.relpath(path, app.root_dir)) header = yaml.dump(metadata) os.makedirs(os.path.dirname(path), 0o755, True) - with codecs.open(path, 'w', 'utf8') as fp: + with codecs.open(path, 'w', encoding='utf8') as fp: fp.write("---\n") fp.write(header) fp.write("---\n") diff -r 64f37c4cce68 -r 52e4d9a1f917 piecrust/importing/jekyll.py --- a/piecrust/importing/jekyll.py Wed Aug 27 10:23:32 2014 -0700 +++ b/piecrust/importing/jekyll.py Wed Aug 27 17:14:44 2014 -0700 @@ -5,54 +5,46 @@ import yaml import logging from piecrust.configuration import parse_config_header -from piecrust.importing.base import Importer +from piecrust.importing.base import FileWalkingImporter from piecrust.uriutil import multi_replace logger = logging.getLogger(__name__) -class JekyllImporter(Importer): +class JekyllImporter(FileWalkingImporter): def __init__(self): super(JekyllImporter, self).__init__() self.name = 'jekyll' self.description = "Imports content from a Jekyll or Octopress blog." def setupParser(self, parser, app): + super(JekyllImporter, self).setupParser(parser, app) parser.add_argument('root_dir', help="The root directory of the Jekyll or Octopress website.") def importWebsite(self, app, args): logger.debug("Importing Jekyll site from: %s" % args.root_dir) - for dirpath, dirnames, filenames in os.walk(args.root_dir): - dirnames[:] = list(filter(lambda i: not i[0] == '.', dirnames)) - for fn in filenames: - if fn[0] == '.': - continue - - full_fn = os.path.join(dirpath, fn) - rel_fn = os.path.relpath(full_fn, args.root_dir) - if rel_fn.startswith('.' + os.sep): - rel_fn = fn + self._startWalk(args.root_dir, args.exclude, app) + logger.info("The Jekyll website was successfully imported.") - logger.debug("- %s" % rel_fn) - if rel_fn == '_config.yml': - self.convertConfig(app, full_fn) - elif rel_fn.startswith('_layouts'): - self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):]) - elif rel_fn.startswith('_includes'): - self.convertInclude(app, full_fn, rel_fn[len('_includes/'):]) - elif rel_fn.startswith('_posts'): - self.convertPost(app, full_fn, rel_fn[len('_posts/'):]) - else: - with open(full_fn, 'rb') as fp: - firstline = fp.read(3) - if firstline == '---': - self.convertPage(app, full_fn, rel_fn) - else: - self.convertStatic(app, full_fn, rel_fn) - - logger.info("The Jekyll website was successfully imported.") + def _importFile(self, full_fn, rel_fn, app): + logger.debug("- %s" % rel_fn) + if rel_fn == '_config.yml': + self.convertConfig(app, full_fn) + elif rel_fn.startswith('_layouts'): + self.convertLayout(app, full_fn, rel_fn[len('_layouts/'):]) + elif rel_fn.startswith('_includes'): + self.convertInclude(app, full_fn, rel_fn[len('_includes/'):]) + elif rel_fn.startswith('_posts'): + self.convertPost(app, full_fn, rel_fn[len('_posts/'):]) + else: + with open(full_fn, 'rb') as fp: + firstline = fp.read(3) + if firstline == '---': + self.convertPage(app, full_fn, rel_fn) + else: + self.convertStatic(app, full_fn, rel_fn) def convertConfig(self, app, src_path): logger.debug(" Converting configuration file.") diff -r 64f37c4cce68 -r 52e4d9a1f917 piecrust/importing/piecrust.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/piecrust/importing/piecrust.py Wed Aug 27 17:14:44 2014 -0700 @@ -0,0 +1,66 @@ +import os +import os.path +import re +import shutil +import logging +from piecrust.importing.base import FileWalkingImporter + + +logger = logging.getLogger(__name__) + + +class PieCrust1Importer(FileWalkingImporter): + def __init__(self): + super(PieCrust1Importer, self).__init__() + self.name = 'piecrust1' + self.description = "Imports content from a PieCrust 1 website." + + def setupParser(self, parser, app): + super(PieCrust1Importer, self).setupParser(parser, app) + parser.add_argument('root_dir', + help="The root directory of the PieCrust 1 website.") + + def importWebsite(self, app, args): + logger.debug("Importing PieCrust 1 site from: %s" % args.root_dir) + exclude = args.exclude or [] + exclude += ['_cache', '_counter'] + self._startWalk(args.root_dir, exclude, app) + logger.info("The PieCrust website was successfully imported.") + + def _importFile(self, full_fn, rel_fn, app): + logger.debug("- %s" % rel_fn) + dest_path = rel_fn + convert_func = None + if rel_fn.replace('\\', '/') == '_content/config.yml': + dest_path = 'config.yml' + convert_func = self.convertConfig + elif rel_fn.startswith('_content'): + dest_path = rel_fn[len('_content/'):] + fn_dirname = os.path.dirname(rel_fn) + if not fn_dirname.endswith('-assets'): + convert_func = self.convertPage + else: + dest_path = 'assets/' + rel_fn + + logger.debug(" %s -> %s" % (rel_fn, dest_path)) + full_dest_path = os.path.join(app.root_dir, dest_path) + os.makedirs(os.path.dirname(full_dest_path), 0o755, True) + if convert_func is None: + shutil.copy2(full_fn, full_dest_path) + else: + with open(full_fn, 'r', encoding='utf8') as fp: + content = fp.read() + converted_content = convert_func(content) + with open(full_dest_path, 'w', encoding='utf8') as fp: + fp.write(converted_content) + if converted_content != content: + logger.warning("'%s' has been modified. The original version " + "has been kept for reference." % rel_fn) + shutil.copy2(full_fn, full_dest_path + '.orig') + + def convertConfig(self, content): + return content + + def convertPage(self, content): + return content + diff -r 64f37c4cce68 -r 52e4d9a1f917 piecrust/pathutil.py --- a/piecrust/pathutil.py Wed Aug 27 10:23:32 2014 -0700 +++ b/piecrust/pathutil.py Wed Aug 27 17:14:44 2014 -0700 @@ -1,6 +1,7 @@ import re import os import os.path +import fnmatch re_terminal_path = re.compile(r'[/\\]|(\w\:)') @@ -30,3 +31,19 @@ raise SiteNotFoundError(cwd) return cwd + +def multi_fnmatch_filter(names, patterns, modifier=None, inverse=True): + res = [] + for n in names: + matches = False + test_n = modifier(n) if modifier else n + for p in patterns: + if fnmatch.fnmatch(test_n, p): + matches = True + break + if matches and not inverse: + res.append(n) + elif not matches and inverse: + res.append(n) + return res + diff -r 64f37c4cce68 -r 52e4d9a1f917 piecrust/plugins/builtin.py --- a/piecrust/plugins/builtin.py Wed Aug 27 10:23:32 2014 -0700 +++ b/piecrust/plugins/builtin.py Wed Aug 27 17:14:44 2014 -0700 @@ -8,6 +8,7 @@ from piecrust.data.provider import (IteratorDataProvider, BlogDataProvider) from piecrust.formatting.markdownformatter import MarkdownFormatter from piecrust.importing.jekyll import JekyllImporter +from piecrust.importing.piecrust import PieCrust1Importer from piecrust.plugins.base import PieCrustPlugin from piecrust.processing.base import CopyFileProcessor from piecrust.processing.less import LessProcessor @@ -70,5 +71,6 @@ def getImporters(self): return [ - JekyllImporter()] + JekyllImporter(), + PieCrust1Importer()]