annotate piecrust/importing/base.py @ 1188:a7c43131d871

bake: Fix file write flushing problem with Python 3.8+ Writing the cache files fails in Python 3.8 because it looks like flushing behaviour has changed. We need to explicitly flush. And even then, in very rare occurrences, it looks like it can still run into racing conditions, so we do a very hacky and ugly "retry" loop when fetching cached data :(
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 15 Jun 2021 22:36:23 -0700
parents 9093618aea08
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
1 import os.path
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
2 import shutil
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
3 import codecs
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
4 import logging
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
5 import yaml
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
6 from urllib.parse import urlparse
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
7 from urllib.request import urlopen
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
8 from piecrust.pathutil import SiteNotFoundError, multi_fnmatch_filter
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
9
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
10
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
11 logger = logging.getLogger(__name__)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
12
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
13
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
14 class Importer(object):
297
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
15 name = None
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
16 description = None
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
17 requires_website = True
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
18
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
19 def setupParser(self, parser, app):
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
20 raise NotImplementedError()
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
21
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
22 def importWebsite(self, app, args):
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
23 raise NotImplementedError()
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
24
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
25 def checkedImportWebsite(self, ctx):
63
28958565a17b In-place upgrade for PieCrust 1 sites.
Ludovic Chabant <ludovic@chabant.com>
parents: 62
diff changeset
26 if ctx.app.root_dir is None and self.requires_website:
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
27 raise SiteNotFoundError()
297
2823ea40cfac import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents: 63
diff changeset
28 return self.importWebsite(ctx.app, ctx.args)
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
29
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
30
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
31 class FileWalkingImporter(Importer):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
32 def setupParser(self, parser, app):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
33 parser.add_argument('--exclude', nargs='+',
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
34 help=("Patterns of files and directories to exclude "
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
35 "from the import (always includes `.git*`, "
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
36 "`.hg*`, `.svn`, `.bzr`)."))
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
37
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
38 def _startWalk(self, root_dir, exclude, *args, **kwargs):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
39 if exclude is None:
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
40 exclude = []
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
41 exclude += ['.git*', '.hg*', '.svn', '.bzr']
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
42
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
43 for dirpath, dirnames, filenames in os.walk(root_dir):
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
44 rel_dirpath = os.path.relpath(dirpath, root_dir)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
45 if rel_dirpath == '.':
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
46 rel_dirpath = ''
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
47
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
48 dirnames[:] = multi_fnmatch_filter(
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
49 dirnames, exclude,
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
50 modifier=lambda d: os.path.join(rel_dirpath, d),
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
51 inverse=True)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
52 filenames = multi_fnmatch_filter(
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
53 filenames, exclude,
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
54 modifier=lambda f: os.path.join(rel_dirpath, f),
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
55 inverse=True)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
56
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
57 for fn in filenames:
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
58 full_fn = os.path.join(dirpath, fn)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
59 rel_fn = os.path.join(rel_dirpath, fn)
539
9093618aea08 import: Add some debug logging.
Ludovic Chabant <ludovic@chabant.com>
parents: 300
diff changeset
60 logger.debug("Importing file: %s" % rel_fn)
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
61 self._importFile(full_fn, rel_fn, *args, **kwargs)
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
63
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
64 def create_page(app, rel_path, metadata, content):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
65 path = os.path.join(app.root_dir, rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
66 logging.info("Creating page: %s" % rel_path)
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
67 header = yaml.dump(metadata)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
68 os.makedirs(os.path.dirname(path), 0o755, True)
62
52e4d9a1f917 Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents: 60
diff changeset
69 with codecs.open(path, 'w', encoding='utf8') as fp:
60
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
70 fp.write("---\n")
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
71 fp.write(header)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
72 fp.write("---\n")
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
73 fp.write(content)
6e60e0fef2be Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff changeset
74
300
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
75
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
76 def download_asset(app, url, rel_path=None, skip_if_exists=True):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
77 if rel_path is None:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
78 parsed_url = urlparse(url)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
79 rel_path = 'assets/' + parsed_url.path.lstrip('/')
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
80 path = os.path.join(app.root_dir, rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
81 if skip_if_exists and os.path.exists(path):
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
82 return
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
83 logger.info("Downloading %s" % rel_path)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
84 os.makedirs(os.path.dirname(path), 0o755, True)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
85 with urlopen(url) as resp, open(path, 'wb') as fp:
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
86 shutil.copyfileobj(resp, fp)
2daa05a21026 import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents: 297
diff changeset
87