Mercurial > wikked
annotate tools/import_mediawiki.py @ 500:d3cd7d8d6b25 default tip
web: Breaking changes in flask-login API.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Sun, 07 Jun 2020 00:56:00 -0700 |
parents | bf65fba2854c |
children |
rev | line source |
---|---|
426
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
1 import os |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
2 import os.path |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
3 import argparse |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
4 from sqlalchemy import create_engine |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
5 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
6 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
7 def main(): |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
8 parser = argparse.ArgumentParser() |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
9 parser.add_argument('url') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
10 parser.add_argument('-o', '--out', default='wikked_import') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
11 parser.add_argument('--prefix', default='wiki') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
12 parser.add_argument('-v', '--verbose', action='store_true') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
13 parser.add_argument('--ext', default='.md') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
14 args = parser.parse_args() |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
15 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
16 prefix = args.prefix |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
17 out_dir = args.out |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
18 ext = '.' + args.ext.lstrip('.') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
19 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
20 if not out_dir: |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
21 parser.print_help() |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
22 return 1 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
23 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
24 if os.path.isdir(out_dir): |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
25 print("The output directory already exists!") |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
26 return 1 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
27 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
28 engine = create_engine(args.url, echo=args.verbose) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
29 conn = engine.connect() |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
30 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
31 query = ( |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
32 'SELECT ' |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
33 'p.page_id,p.page_title,p.page_latest,' |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
34 'r.rev_id,r.rev_text_id,t.old_id,t.old_text ' |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
35 'from %(prefix)s_page p ' |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
36 'INNER JOIN %(prefix)s_revision r ON p.page_latest = r.rev_id ' |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
37 'INNER JOIN %(prefix)s_text t ON r.rev_text_id = t.old_id;' % |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
38 {'prefix': prefix}) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
39 q = conn.execute(query) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
40 for p in q: |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
41 title = p['page_title'].decode('utf8') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
42 text = p['old_text'].decode('utf8') |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
43 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
44 path_noext = os.path.join(out_dir, title) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
45 path = path_noext + ext |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
46 dirname = os.path.dirname(path) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
47 if not os.path.isdir(dirname): |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
48 os.makedirs(dirname) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
49 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
50 if os.path.exists(path): |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
51 suffnum = 2 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
52 while True: |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
53 new_path = '%s_%d%s' % (path_noext, suffnum, ext) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
54 if not os.path.exists(new_path): |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
55 break |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
56 suffnum += 1 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
57 if suffnum > 100: |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
58 raise Exception("Can't find available path for: " % |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
59 path) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
60 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
61 print("WARNING: %s exists" % path) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
62 print("WARNING: creating %s instead" % new_path) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
63 path = new_path |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
64 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
65 print(p['page_id'], title) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
66 with open(path, 'w', encoding='utf8') as fp: |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
67 fp.write(text) |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
68 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
69 conn.close() |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
70 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
71 |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
72 if __name__ == '__main__': |
bf65fba2854c
tools: Add some simple MediaWiki importer.
Ludovic Chabant <ludovic@chabant.com>
parents:
diff
changeset
|
73 main() |