Mercurial > wikked
comparison tools/import_mediawiki.py @ 426:bf65fba2854c
tools: Add some simple MediaWiki importer.
author | Ludovic Chabant <ludovic@chabant.com> |
---|---|
date | Tue, 28 Mar 2017 21:25:00 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
425:e28f2c76691c | 426:bf65fba2854c |
---|---|
1 import os | |
2 import os.path | |
3 import argparse | |
4 from sqlalchemy import create_engine | |
5 | |
6 | |
7 def main(): | |
8 parser = argparse.ArgumentParser() | |
9 parser.add_argument('url') | |
10 parser.add_argument('-o', '--out', default='wikked_import') | |
11 parser.add_argument('--prefix', default='wiki') | |
12 parser.add_argument('-v', '--verbose', action='store_true') | |
13 parser.add_argument('--ext', default='.md') | |
14 args = parser.parse_args() | |
15 | |
16 prefix = args.prefix | |
17 out_dir = args.out | |
18 ext = '.' + args.ext.lstrip('.') | |
19 | |
20 if not out_dir: | |
21 parser.print_help() | |
22 return 1 | |
23 | |
24 if os.path.isdir(out_dir): | |
25 print("The output directory already exists!") | |
26 return 1 | |
27 | |
28 engine = create_engine(args.url, echo=args.verbose) | |
29 conn = engine.connect() | |
30 | |
31 query = ( | |
32 'SELECT ' | |
33 'p.page_id,p.page_title,p.page_latest,' | |
34 'r.rev_id,r.rev_text_id,t.old_id,t.old_text ' | |
35 'from %(prefix)s_page p ' | |
36 'INNER JOIN %(prefix)s_revision r ON p.page_latest = r.rev_id ' | |
37 'INNER JOIN %(prefix)s_text t ON r.rev_text_id = t.old_id;' % | |
38 {'prefix': prefix}) | |
39 q = conn.execute(query) | |
40 for p in q: | |
41 title = p['page_title'].decode('utf8') | |
42 text = p['old_text'].decode('utf8') | |
43 | |
44 path_noext = os.path.join(out_dir, title) | |
45 path = path_noext + ext | |
46 dirname = os.path.dirname(path) | |
47 if not os.path.isdir(dirname): | |
48 os.makedirs(dirname) | |
49 | |
50 if os.path.exists(path): | |
51 suffnum = 2 | |
52 while True: | |
53 new_path = '%s_%d%s' % (path_noext, suffnum, ext) | |
54 if not os.path.exists(new_path): | |
55 break | |
56 suffnum += 1 | |
57 if suffnum > 100: | |
58 raise Exception("Can't find available path for: " % | |
59 path) | |
60 | |
61 print("WARNING: %s exists" % path) | |
62 print("WARNING: creating %s instead" % new_path) | |
63 path = new_path | |
64 | |
65 print(p['page_id'], title) | |
66 with open(path, 'w', encoding='utf8') as fp: | |
67 fp.write(text) | |
68 | |
69 conn.close() | |
70 | |
71 | |
72 if __name__ == '__main__': | |
73 main() |