-
Notifications
You must be signed in to change notification settings - Fork 0
/
schema_update.py
executable file
·62 lines (51 loc) · 1.75 KB
/
schema_update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import json
import csv
import os
# Manual changes before run
dir_crosswalk = 'btaa_crosswalk.csv'
dir_old_schema = 'btaa_1.0/'
dir_new_schema = 'btaa_aardvark/'
# Load the crosswalk.csv and make it a ditionary
# key-value pairs in the dictionary refers to the old-new schemas
crosswalk = {}
with open(dir_crosswalk) as f:
reader = csv.reader(f)
fields = next(reader)
for record in reader:
old = record[0]
new = record[1]
crosswalk[old] = new
# Function to update the metadata schema
def schema_update(filepath):
# Open the JSON file with schema GBL 1.0
with open(filepath) as fr:
# Load its content and make a new dictionary
data = json.load(fr)
# Loop over crosswalk to change dictionary keys
for old_schema, new_schema in crosswalk.items():
if old_schema in data:
data[new_schema] = data.pop(old_schema)
# check for multi-val field
# if so, convert its value to an array
data = string2array(data)
# Write updated JSON to a new folder
filepath_updated = dir_new_schema + file
with open(filepath_updated, 'w') as fw:
j = json.dumps(data, indent=2)
fw.write(j)
# Function to convert fields that ends with '_sm' to an array
def string2array(dict):
for key in dict.keys():
suffix = key.split('_')[-1]
if suffix == 'sm' or suffix == 'im':
val = dict[key]
if type(val) != list:
dict[key] = [val]
return dict
# Collect all JSON files in a list
# Iterate the list to update metadata schema
files = [x for x in os.listdir(dir_old_schema) if x.endswith('.json')]
for file in files:
print(f'Executing {file} ...')
filepath = dir_old_schema + file
schema_update(filepath)