-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathconvert_json_to_sqlite.py
More file actions
90 lines (73 loc) · 2.45 KB
/
convert_json_to_sqlite.py
File metadata and controls
90 lines (73 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sqlite3
import json
import logging
dict_id = 1 # moedict
word_to_id = {}
class DB:
def __init__(self):
self.conn = sqlite3.connect('dict-revised.sqlite3')
def insert(self, sql, *bind):
c = self.conn.cursor()
try:
c.execute(sql, bind)
except sqlite3.InterfaceError:
print sql, bind
raise
c.close()
return c.lastrowid
def insert_dict(self, table, dct):
keys = dct.keys()
sql = 'INSERT INTO %s (%s) VALUES(%s)' % (
table,
','.join(keys),
','.join('?'*len(keys)))
return self.insert(sql, *[dct[k] for k in keys])
def query(self, sql, *bind):
c = self.conn.cursor()
c.execute(sql, bind)
result = c.fetchall()
c.close()
return result
def close(self):
self.conn.commit()
self.conn.close()
def dict_filter(dct, excludes=[], **argd):
d = dict(dct)
for k in excludes:
if k in d:
del d[k]
d.update(argd)
return d
def insert_db(entry, db):
entry_id = db.insert_dict('entries',
dict_filter(entry, excludes=['heteronyms', 'translation', 'English', 'francais', 'Deutsch'], dict_id=dict_id))
word_to_id[entry['title']] = entry_id
logging.debug('entry_id=%d' % entry_id)
for i, h in enumerate(entry['heteronyms']):
heteronym_id = db.insert_dict('heteronyms',
dict_filter(h, excludes=['definitions'],
entry_id=entry_id, idx='%d' % i))
logging.debug('heteronym_id=%d' % heteronym_id)
for j, d in enumerate(h['definitions']):
if 'quote' in d:
d['quote'] = ','.join(d['quote'])
if 'example' in d:
d['example'] = ','.join(d['example'])
if 'link' in d:
d['link'] = ','.join(d['link'])
d['idx'] = '%d' % j
d['heteronym_id'] = heteronym_id
db.insert_dict('definitions', d)
if 'translation' in entry:
for i, l in enumerate(entry['translation']):
for j, d in enumerate(entry['translation'][l]):
language_id = db.insert_dict('translations', {'lang': l, 'def': d, 'idx': '%d' % i, 'entry_id': entry_id})
def main():
db = DB()
try:
for entry in json.load(file('dict-revised.json')):
insert_db(entry, db)
finally:
db.close()
if __name__ == '__main__':
main()