annotate piecrust/importing/base.py @ 411:e7b865f8f335

bake: Enable multiprocess baking. Baking is now done by running a worker per CPU, and sending jobs to them. This changes several things across the codebase: * Ability to not cache things related to pages other than the 'main' page (i.e. the page at the bottom of the execution stack). * Decouple the baking process from the bake records, so only the main process keeps track (and modifies) the bake record. * Remove the need for 'batch page getters' and loading a page directly from the page factories. There are various smaller changes too included here, including support for scope performance timers that are saved with the bake record and can be printed out to the console. Yes I got carried away. For testing, the in-memory 'mock' file-system doesn't work anymore, since we're spawning processes, so this is replaced by a 'tmpfs' file-system which is saved in temporary files on disk and deleted after tests have run.
author Ludovic Chabant <ludovic@chabant.com>
date Fri, 12 Jun 2015 17:09:19 -0700
parents 2daa05a21026
children 9093618aea08
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import os.path
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
2 import shutil
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import codecs
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import logging
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 import yaml
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
6 from urllib.parse import urlparse
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
7 from urllib.request import urlopen
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
8 from piecrust.pathutil import SiteNotFoundError, multi_fnmatch_filter
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 logger = logging.getLogger(__name__)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 class Importer(object):
297
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
15 name = None
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
16 description = None
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
17 requires_website = True
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 def setupParser(self, parser, app):
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 raise NotImplementedError()
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 def importWebsite(self, app, args):
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 raise NotImplementedError()
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 def checkedImportWebsite(self, ctx):
63
28958565a17b In-place upgrade for PieCrust 1 sites.
Ludovic Chabant <ludovic@chabant.com>
parents: 62
diff changeset
26 if ctx.app.root_dir is None and self.requires_website:
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 raise SiteNotFoundError()
297
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
28 return self.importWebsite(ctx.app, ctx.args)
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
31 class FileWalkingImporter(Importer):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
32 def setupParser(self, parser, app):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
33 parser.add_argument('--exclude', nargs='+',
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
34 help=("Patterns of files and directories to exclude "
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
35 "from the import (always includes `.git*`, "
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
36 "`.hg*`, `.svn`, `.bzr`)."))
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
37
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
38 def _startWalk(self, root_dir, exclude, *args, **kwargs):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
39 if exclude is None:
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
40 exclude = []
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
41 exclude += ['.git*', '.hg*', '.svn', '.bzr']
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
42
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
43 for dirpath, dirnames, filenames in os.walk(root_dir):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
44 rel_dirpath = os.path.relpath(dirpath, root_dir)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
45 if rel_dirpath == '.':
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
46 rel_dirpath = ''
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
47
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
48 dirnames[:] = multi_fnmatch_filter(
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
49 dirnames, exclude,
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
50 modifier=lambda d: os.path.join(rel_dirpath, d),
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
51 inverse=True)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
52 filenames = multi_fnmatch_filter(
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
53 filenames, exclude,
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
54 modifier=lambda f: os.path.join(rel_dirpath, f),
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
55 inverse=True)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
56
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
57 for fn in filenames:
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
58 full_fn = os.path.join(dirpath, fn)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
59 rel_fn = os.path.join(rel_dirpath, fn)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
60 self._importFile(full_fn, rel_fn, *args, **kwargs)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
61
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
62
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
63 def create_page(app, rel_path, metadata, content):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
64 path = os.path.join(app.root_dir, rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
65 logging.info("Creating page: %s" % rel_path)
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
66 header = yaml.dump(metadata)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 os.makedirs(os.path.dirname(path), 0o755, True)
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
68 with codecs.open(path, 'w', encoding='utf8') as fp:
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
69 fp.write("---\n")
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
70 fp.write(header)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
71 fp.write("---\n")
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
72 fp.write(content)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
74
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
75 def download_asset(app, url, rel_path=None, skip_if_exists=True):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
76 if rel_path is None:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
77 parsed_url = urlparse(url)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
78 rel_path = 'assets/' + parsed_url.path.lstrip('/')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
79 path = os.path.join(app.root_dir, rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
80 if skip_if_exists and os.path.exists(path):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
81 return
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
82 logger.info("Downloading %s" % rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
83 os.makedirs(os.path.dirname(path), 0o755, True)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
84 with urlopen(url) as resp, open(path, 'wb') as fp:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
85 shutil.copyfileobj(resp, fp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
86