Skip to content

Commit

Permalink
chore(json2tsv): remove columns which all None
Browse files Browse the repository at this point in the history
  • Loading branch information
giangbui committed Apr 4, 2018
1 parent 3779f8e commit 7ffe37c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
35 changes: 21 additions & 14 deletions peregrine/utils/json2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def dicts2tsv(dict_list):
header_set = set()

for dict_row in dict_list:
header_set.update(dict_row.keys())
for key in dict_row.keys():
if (dict_row[key] is not None and dict_row[key] != []):
header_set.update([key])

for h in header_set:
words = h.split('-')
Expand All @@ -92,23 +94,28 @@ def join(tsv_list, L, index, row):
Join list of sub tsv to generate a big tsv
Args:
tsv_list(list): list of tables or tvs. Each element is represented by a list of dictionary
tsv_list(list): list of tables or tsv. Each element is represented by a list of dictionary
L(list): joined table that is iteratively updated
index(int): the index of the table will be joined
row(dict): the current joining row
Returns: None
'''
print('joining')
print(L)
print(tsv_list)
print('\n')
if index == len(tsv_list):
L.append(row)
else:
for item in tsv_list[index]:
newrow = row.copy()
newrow.update(item)
join(tsv_list, L, index + 1, newrow)
print(L)
print('joined\n\n')


def json2tsv(json, prefix, delem):
def json2tsv(L, json, prefix, delem):
'''
Convert json file to tsv format
Expand All @@ -121,29 +128,29 @@ def json2tsv(json, prefix, delem):
list of dictionary representing a tsv file. Each item in the list represent a row data.
each row is a dictionary with column name key and value at that position
'''

L = []

if isinstance(json, list) and json != []:
for l in json:
L += (json2tsv(l, prefix, delem))
return L
json2tsv(L, l, prefix, delem)
if isinstance(json, dict):
# handle dictionary
tsv_list = []
for k in json.keys():
tsv = json2tsv(json[k], prefix + delem + k, delem)
tsv_list.append(tsv)
tsv = []
json2tsv(tsv, json[k], prefix + delem + k, delem)
if tsv != []:
tsv_list.append(tsv)

join(tsv_list, L, 0, {})
else:
elif not isinstance(json, list) and json != None:
L.append({prefix: json})
return L


def flatten_json(json, prefix, delem):
data = json['data']
res = {}
for key, val in data.iteritems():
res[key] = json2tsv({key: val}, prefix, delem)
L = []
json2tsv(L, {key: val}, prefix, delem)
res[key] = L

return res
4 changes: 3 additions & 1 deletion peregrine/utils/pybdbag.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def create_bdbag(bag_info, payload, max_row=1000):
header_set = set()

for dict_row in json_data:
header_set.update(dict_row.keys())
for key in dict_row.keys():
if (dict_row[key] is not None and dict_row[key] != []):
header_set.update([key])

with open(bag_path + '/data/' + node_name + '.tsv', 'w') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
Expand Down

0 comments on commit 7ffe37c

Please sign in to comment.