Mercurial > piecrust2
annotate piecrust/importing/wordpress.py @ 1188:a7c43131d871
bake: Fix file write flushing problem with Python 3.8+
Writing the cache files fails in Python 3.8 because it looks like flushing
behaviour has changed. We need to explicitly flush. And even then, in very
rare occurrences, it looks like it can still run into racing conditions,
so we do a very hacky and ugly "retry" loop when fetching cached data :(
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 15 Jun 2021 22:36:23 -0700 |
parents | 4850f8c21b6e |
children |
rev | line source |
---|---|
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 import os.path |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
2 import logging |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 import datetime |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 import yaml |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
5 from collections import OrderedDict |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
6 from piecrust import CONFIG_PATH |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
7 from piecrust.configuration import ( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
8 ConfigurationLoader, ConfigurationDumper, merge_dicts) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 from piecrust.importing.base import Importer, create_page, download_asset |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 logger = logging.getLogger(__name__) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
15 class WordpressImporterBase(Importer): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
16 def setupParser(self, parser, app): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
17 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
18 '--pages-source', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
19 default="pages", |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
20 help="The source to store pages in.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
21 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
22 '--posts-source', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
23 default="posts", |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
24 help="The source to store posts in.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
25 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
26 '--default-post-layout', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
27 help="The default layout to use for posts.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
28 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
29 '--default-post-category', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
30 help="The default category to use for posts.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
31 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
32 '--default-page-layout', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
33 help="The default layout to use for pages.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
34 parser.add_argument( |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
35 '--default-page-category', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
36 help="The default category to use for pages.") |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
37 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
38 def importWebsite(self, app, args): |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
39 impl = self._getImplementation(app, args) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
40 return impl.importWebsite() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
41 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
42 def _getImplementation(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
43 raise NotImplementedError() |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
44 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
45 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
46 class _ImporterBase(object): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
47 def __init__(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
48 self.app = app |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
49 self._cat_map = {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
50 self._author_map = {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
51 self._pages_source = app.getSource(args.pages_source) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
52 self._posts_source = app.getSource(args.posts_source) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
53 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
54 def importWebsite(self): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
55 ctx = self._open() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
56 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
57 # Site configuration. |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
58 logger.info("Generating site configuration...") |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
59 site_config = self._getSiteConfig(ctx) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
60 site_config.setdefault('site', {}) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
61 site_config['site'].update({ |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
62 'post_url': '%year%/%month%/%slug%', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
63 'category_url': 'category/%category%'}) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
64 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
65 site_config_path = os.path.join(self.app.root_dir, CONFIG_PATH) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
66 with open(site_config_path, 'r') as fp: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
67 cfg_data = yaml.load(fp, Loader=ConfigurationLoader) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
68 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
69 cfg_data = cfg_data or {} |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
70 merge_dicts(cfg_data, site_config) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
71 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
72 with open(site_config_path, 'w') as fp: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
73 yaml.dump(cfg_data, fp, default_flow_style=False, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
74 allow_unicode=True, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
75 Dumper=ConfigurationDumper) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
76 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
77 # Content |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
78 for p in self._getPosts(ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
79 if p['type'] == 'attachment': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
80 self._createAsset(p) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
81 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
82 self._createPost(p) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
83 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
84 self._close(ctx) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
85 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
86 def _open(self): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
87 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
88 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
89 def _close(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
90 pass |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
91 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
92 def _getSiteConfig(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
93 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
94 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
95 def _getPosts(self, ctx): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
96 raise NotImplementedError() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
97 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
98 def _createAsset(self, asset_info): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
99 download_asset(self.app, asset_info['url']) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
100 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
101 def _createPost(self, post_info): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
102 post_dt = post_info['datetime'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
103 finder = { |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
104 'year': post_dt.year, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
105 'month': post_dt.month, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
106 'day': post_dt.day, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
107 'slug': post_info['slug']} |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
108 if post_info['type'] == 'post': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
109 source = self._posts_source |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
110 elif post_info['type'] == 'page': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
111 source = self._pages_source |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
112 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
113 raise Exception("Unknown post type: %s" % post_info['type']) |
363
dd25bd3ce1f9
serve: Refactoring and fixes to be able to serve taxonomy pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
314
diff
changeset
|
114 factory = source.findPageFactory(finder, MODE_CREATING) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
115 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
116 metadata = post_info['metadata'].copy() |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
117 for name in ['title', 'author', 'status', 'post_id', 'post_guid', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
118 'description', 'categories']: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
119 val = post_info.get(name) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
120 if val is not None: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
121 metadata[name] = val |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
122 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
123 content = post_info['content'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
124 excerpt = post_info['excerpt'] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
125 text = content |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
126 if excerpt is not None and excerpt.strip() != '': |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
127 text = "%s\n\n---excerpt---\n\n%s" % (content, excerpt) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
128 |
314
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
129 status = metadata.get('status') |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
130 if status == 'publish': |
363
dd25bd3ce1f9
serve: Refactoring and fixes to be able to serve taxonomy pages.
Ludovic Chabant <ludovic@chabant.com>
parents:
314
diff
changeset
|
131 path = factory.path |
314
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
132 create_page(self.app, path, metadata, text) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
133 elif status == 'draft': |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
134 filename = '-'.join(metadata['title'].split(' ')) + '.html' |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
135 path = os.path.join(self.app.root_dir, 'drafts', filename) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
136 create_page(self.app, path, metadata, text) |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
137 else: |
1ddd18ad5e76
import: Wordpress importer puts drafts in a `draft` folder. Ignore other statuses.
Ludovic Chabant <ludovic@chabant.com>
parents:
302
diff
changeset
|
138 logger.warning("Ignoring post with status: %s" % status) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
139 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
140 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
141 class _XmlImporter(_ImporterBase): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
142 ns_wp = {'wp': 'http://wordpress.org/export/1.2/'} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
143 ns_dc = {'dc': "http://purl.org/dc/elements/1.1/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
144 ns_excerpt = {'excerpt': "http://wordpress.org/export/1.2/excerpt/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
145 ns_content = {'content': "http://purl.org/rss/1.0/modules/content/"} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
146 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
147 def __init__(self, app, args): |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
148 super(_XmlImporter, self).__init__(app, args) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
149 self.path = args.xml_path |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
150 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
151 def _open(self): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
152 if not os.path.exists(self.path): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
153 raise Exception("No such file: %s" % self.path) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
154 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
155 try: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
156 import xml.etree.ElementTree as ET |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
157 except ImportError: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
158 logger.error("You don't seem to have any support for ElementTree " |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
159 "XML parsing.") |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
160 return 1 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
161 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
162 with open(self.path, 'r', encoding='utf8') as fp: |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
163 xml = fp.read() |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
164 xml = xml.replace(chr(0x1e), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
165 xml = xml.replace(chr(0x10), '') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
166 tree = ET.fromstring(xml) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
167 channel = tree.find('channel') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
168 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
169 return channel |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
170 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
171 def _getSiteConfig(self, channel): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
172 # Get basic site information |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
173 title = find_text(channel, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
174 description = find_text(channel, 'description') |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
175 site_config = OrderedDict({ |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
176 'site': { |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
177 'title': title, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
178 'description': description} |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
179 }) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
180 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
181 # Get authors' names. |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
182 authors = {} |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
183 for a in channel.findall('wp:author', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
184 login = find_text(a, 'wp:author_login', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
185 authors[login] = { |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
186 'email': find_text(a, 'wp:author_email', self.ns_wp), |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
187 'display_name': find_text(a, 'wp:author_display_name', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
188 self.ns_wp), |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
189 'first_name': find_text(a, 'wp:author_first_name', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
190 self.ns_wp), |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
191 'last_name': find_text(a, 'wp:author_last_name', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
192 self.ns_wp), |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
193 'author_id': find_text(a, 'wp:author_id', |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
194 self.ns_wp)} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
195 site_config['site']['authors'] = authors |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
196 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
197 return site_config |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
198 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
199 def _getPosts(self, channel): |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
200 for i in channel.findall('item'): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
201 post_type = find_text(i, 'wp:post_type', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
202 if post_type == 'attachment': |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
203 yield self._getAssetInfo(i) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
204 else: |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
205 yield self._getPostInfo(i) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
206 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
207 def _getAssetInfo(self, node): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
208 url = find_text(node, 'wp:attachment_url', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
209 return {'type': 'attachment', 'url': url} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
210 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
211 def _getPostInfo(self, node): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
212 post_date_str = find_text(node, 'wp:post_date', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
213 post_date = datetime.datetime.strptime(post_date_str, |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
214 '%Y-%m-%d %H:%M:%S') |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
215 post_name = find_text(node, 'wp:post_name', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
216 post_type = find_text(node, 'wp:post_type', self.ns_wp) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
217 post_info = { |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
218 'type': post_type, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
219 'slug': post_name, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
220 'datetime': post_date} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
221 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
222 title = find_text(node, 'title') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
223 creator = find_text(node, 'dc:creator', self.ns_dc) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
224 status = find_text(node, 'wp:status', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
225 post_id = find_text(node, 'wp:post_id', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
226 guid = find_text(node, 'guid') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
227 description = find_text(node, 'description') |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
228 # TODO: menu order, parent, password, sticky |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
229 post_info.update({ |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
230 'title': title, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
231 'author': creator, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
232 'status': status, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
233 'post_id': post_id, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
234 'post_guid': guid, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
235 'description': description}) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
236 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
237 categories = [] |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
238 for c in node.findall('category'): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
239 nicename = str(c.attrib.get('nicename')) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
240 categories.append(nicename) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
241 post_info['categories'] = categories |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
242 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
243 metadata = {} |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
244 for m in node.findall('wp:postmeta', self.ns_wp): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
245 key = find_text(m, 'wp:meta_key', self.ns_wp) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
246 metadata[key] = find_text(m, 'wp:meta_value', self.ns_wp) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
247 post_info['metadata'] = metadata |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
248 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
249 content = find_text(node, 'content:encoded', self.ns_content) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
250 excerpt = find_text(node, 'excerpt:encoded', self.ns_excerpt) |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
251 post_info.update({ |
852
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
252 'content': content, |
4850f8c21b6e
core: Start of the big refactor for PieCrust 3.0.
Ludovic Chabant <ludovic@chabant.com>
parents:
363
diff
changeset
|
253 'excerpt': excerpt}) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
254 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
255 return post_info |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
256 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
257 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
258 class WordpressXmlImporter(WordpressImporterBase): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
259 name = 'wordpress-xml' |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
260 description = "Imports a Wordpress blog from an exported XML archive." |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
261 |
302
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
262 def setupParser(self, parser, app): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
263 super(WordpressXmlImporter, self).setupParser(parser, app) |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
264 parser.add_argument( |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
265 'xml_path', |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
266 help="The path to the exported XML archive file.") |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
267 |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
268 def _getImplementation(self, app, args): |
103abb08755e
import: Make the Wordpress importer extendable, rename it to `wordpressxml`.
Ludovic Chabant <ludovic@chabant.com>
parents:
300
diff
changeset
|
269 return _XmlImporter(app, args) |
300
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
270 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
271 |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
272 def find_text(parent, child_name, namespaces=None): |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
273 return str(parent.find(child_name, namespaces).text) |
2daa05a21026
import: Add an XML-based Wordpress importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
274 |