Mercurial > piecrust2
annotate piecrust/importing/base.py @ 1188:a7c43131d871
bake: Fix file write flushing problem with Python 3.8+
Writing the cache files fails in Python 3.8 because it looks like flushing
behaviour has changed. We need to explicitly flush. And even then, in very
rare occurrences, it looks like it can still run into racing conditions,
so we do a very hacky and ugly "retry" loop when fetching cached data :(
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 15 Jun 2021 22:36:23 -0700 |
parents | 9093618aea08 |
children |
rev | line source |
---|---|
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 import os.path |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
2 import shutil |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 import codecs |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 import logging |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
5 import yaml |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
6 from urllib.parse import urlparse |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
7 from urllib.request import urlopen |
62
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
8 from piecrust.pathutil import SiteNotFoundError, multi_fnmatch_filter |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 logger = logging.getLogger(__name__) |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 class Importer(object): |
297
2823ea40cfac
import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents:
63
diff
changeset
|
15 name = None |
2823ea40cfac
import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents:
63
diff
changeset
|
16 description = None |
2823ea40cfac
import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents:
63
diff
changeset
|
17 requires_website = True |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
18 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
19 def setupParser(self, parser, app): |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
20 raise NotImplementedError() |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
21 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
22 def importWebsite(self, app, args): |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
23 raise NotImplementedError() |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
24 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
25 def checkedImportWebsite(self, ctx): |
63
28958565a17b
In-place upgrade for PieCrust 1 sites.
Ludovic Chabant <ludovic@chabant.com>
parents:
62
diff
changeset
|
26 if ctx.app.root_dir is None and self.requires_website: |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
27 raise SiteNotFoundError() |
297
2823ea40cfac
import: Put importer metadata on the class, and allow return values.
Ludovic Chabant <ludovic@chabant.com>
parents:
63
diff
changeset
|
28 return self.importWebsite(ctx.app, ctx.args) |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
29 |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
30 |
62
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
31 class FileWalkingImporter(Importer): |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
32 def setupParser(self, parser, app): |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
33 parser.add_argument('--exclude', nargs='+', |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
34 help=("Patterns of files and directories to exclude " |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
35 "from the import (always includes `.git*`, " |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
36 "`.hg*`, `.svn`, `.bzr`).")) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
37 |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
38 def _startWalk(self, root_dir, exclude, *args, **kwargs): |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
39 if exclude is None: |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
40 exclude = [] |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
41 exclude += ['.git*', '.hg*', '.svn', '.bzr'] |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
42 |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
43 for dirpath, dirnames, filenames in os.walk(root_dir): |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
44 rel_dirpath = os.path.relpath(dirpath, root_dir) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
45 if rel_dirpath == '.': |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
46 rel_dirpath = '' |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
47 |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
48 dirnames[:] = multi_fnmatch_filter( |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
49 dirnames, exclude, |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
50 modifier=lambda d: os.path.join(rel_dirpath, d), |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
51 inverse=True) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
52 filenames = multi_fnmatch_filter( |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
53 filenames, exclude, |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
54 modifier=lambda f: os.path.join(rel_dirpath, f), |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
55 inverse=True) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
56 |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
57 for fn in filenames: |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
58 full_fn = os.path.join(dirpath, fn) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
59 rel_fn = os.path.join(rel_dirpath, fn) |
539
9093618aea08
import: Add some debug logging.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
60 logger.debug("Importing file: %s" % rel_fn) |
62
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
61 self._importFile(full_fn, rel_fn, *args, **kwargs) |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
62 |
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
63 |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
64 def create_page(app, rel_path, metadata, content): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
65 path = os.path.join(app.root_dir, rel_path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
66 logging.info("Creating page: %s" % rel_path) |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
67 header = yaml.dump(metadata) |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
68 os.makedirs(os.path.dirname(path), 0o755, True) |
62
52e4d9a1f917
Simple importer for PieCrust 1 websites.
Ludovic Chabant <ludovic@chabant.com>
parents:
60
diff
changeset
|
69 with codecs.open(path, 'w', encoding='utf8') as fp: |
60
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
70 fp.write("---\n") |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
71 fp.write(header) |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
72 fp.write("---\n") |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
73 fp.write(content) |
6e60e0fef2be
Add `import` command, Jekyll importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
74 |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
75 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
76 def download_asset(app, url, rel_path=None, skip_if_exists=True): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
77 if rel_path is None: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
78 parsed_url = urlparse(url) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
79 rel_path = 'assets/' + parsed_url.path.lstrip('/') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
80 path = os.path.join(app.root_dir, rel_path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
81 if skip_if_exists and os.path.exists(path): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
82 return |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
83 logger.info("Downloading %s" % rel_path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
84 os.makedirs(os.path.dirname(path), 0o755, True) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
85 with urlopen(url) as resp, open(path, 'wb') as fp: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
86 shutil.copyfileobj(resp, fp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
297
diff
changeset
|
87 |