|
| 1 | +import collections |
| 2 | +import glob |
| 3 | +import sqlite3 |
| 4 | + |
| 5 | +import yaml |
| 6 | + |
| 7 | + |
| 8 | +def flatten(d, parent_key='', sep='_'): |
| 9 | + items = [] |
| 10 | + for k, v in d.items(): |
| 11 | + new_key = parent_key + sep + k if parent_key else k |
| 12 | + if isinstance(v, collections.MutableMapping): |
| 13 | + items.extend(flatten(v, new_key, sep=sep).items()) |
| 14 | + else: |
| 15 | + items.append((new_key, v)) |
| 16 | + return dict(items) |
| 17 | + |
| 18 | + |
| 19 | +def generate_schema(table_name, column_names): |
| 20 | + columns = ', '.join(['"{}" TEXT'.format(column_name) for column_name in column_names]) |
| 21 | + return f'CREATE TABLE IF NOT EXISTS "{table_name}" ({columns})' |
| 22 | + |
| 23 | + |
| 24 | +def insert_metadata(connection, table_name, metadata): |
| 25 | + question_marks = ", ".join(["?" for i in range(len(metadata))]) |
| 26 | + column_names = ', '.join('"%s"' % _ for _ in metadata.keys()) |
| 27 | + values = tuple(metadata.values()) |
| 28 | + connection.execute( |
| 29 | + f'INSERT INTO {table_name} ({column_names}) VALUES ({question_marks})', |
| 30 | + values) |
| 31 | + |
| 32 | + |
| 33 | +def generate_database(filename, sections, skipped_keys): |
| 34 | + """Generate sqlite database from yaml metadata files |
| 35 | +
|
| 36 | + Args |
| 37 | + ==== |
| 38 | + filename: str |
| 39 | + sqlite database filename |
| 40 | + sections: set |
| 41 | + set of directories to parse for *.yml files and flatten into sqlite tables |
| 42 | + skipped_keys: set |
| 43 | + dictionary of keys in metadata files to skip (mainly a hack to handle tags and groups |
| 44 | + """ |
| 45 | + connection = sqlite3.connect(filename) |
| 46 | + |
| 47 | + with connection: |
| 48 | + for section in sections: |
| 49 | + keys = set() |
| 50 | + for filename in glob.glob(f'{section}/*.yml'): |
| 51 | + with open(filename) as f: |
| 52 | + metadata = yaml.safe_load(f) |
| 53 | + keys = keys | flatten(metadata).keys() |
| 54 | + |
| 55 | + section_schema = generate_schema(section, keys) |
| 56 | + connection.execute(section_schema) |
| 57 | + |
| 58 | + for filename in glob.glob(f'{section}/*.yml'): |
| 59 | + with open(filename) as f: |
| 60 | + metadata = yaml.safe_load(f) |
| 61 | + for key in skipped_keys.get(section, set()): |
| 62 | + if key in metadata: |
| 63 | + del metadata[key] |
| 64 | + try: |
| 65 | + insert_metadata(connection, section, flatten(metadata)) |
| 66 | + except: |
| 67 | + print('failed to insert', filename) |
| 68 | + |
| 69 | + |
| 70 | +def main(): |
| 71 | + sections = { |
| 72 | + 'bloggers', 'conferences', 'coworking_spaces', |
| 73 | + 'event_spaces', 'groups', 'organizations', 'organizers' |
| 74 | + } |
| 75 | + |
| 76 | + # for now we skip lists this will require a many to many |
| 77 | + # relationship table |
| 78 | + skipped_keys = { |
| 79 | + 'groups': {'tags'}, |
| 80 | + 'organizers': {'group'}, |
| 81 | + } |
| 82 | + |
| 83 | + generate_database('db.sqlite', sections, skipped_keys) |
| 84 | + |
| 85 | + |
| 86 | +if __name__ == "__main__": |
| 87 | + main() |
0 commit comments