Skip to content

Commit f998ed3

Browse files
committed
change 'gene' to 'gene_name' and add 'gene_symbol'
1 parent 257d230 commit f998ed3

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

bin/gvf2indexandlog.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def parse_args():
4949

5050
# create index from GVF
5151
# make empty index df
52-
index_cols=['pos', 'mutation', 'hgvs_aa_mutation', 'hgvs_nt_mutation', 'gene', 'protein_name', 'alias', 'hgvs_alias', 'alias_protein', 'Pokay_annotation', 'lineages']
52+
index_cols=['pos', 'mutation', 'hgvs_aa_mutation', 'hgvs_nt_mutation', 'gene_name', 'gene_symbol', 'protein_name', 'protein_symbol', 'alias', 'hgvs_alias', 'alias_protein', 'Pokay_annotation', 'lineages']
5353
index = pd.DataFrame(np.empty((gvf.shape[0], len(index_cols))), columns=index_cols)
5454
# populate index df with gvf info
5555
index['pos'] = gvf['#start']
@@ -60,8 +60,10 @@ def parse_args():
6060
index['hgvs_alias'] = gvf['hgvs_alias']
6161
index['alias_protein'] = 'n/a'
6262
index.loc[index['alias']!='n/a', 'alias_protein'] = gvf['mat_pep']
63-
index['gene'] = gvf['gene']
63+
index['gene_name'] = gvf['gene_name']
64+
index['gene_symbol'] = gvf['gene_symbol']
6465
index['protein_name'] = gvf['protein_name']
66+
index['protein_symbol'] = gvf['protein_symbol']
6567
index['Pokay_annotation'] = gvf["function_description"].notna()
6668
index['lineages'] = gvf['viral_lineage']
6769
# tidying
@@ -73,7 +75,7 @@ def parse_args():
7375
# create log from index
7476
log = index.copy()
7577
# fill in 'new_mutations' column like: "gene:mutation"
76-
log['new_mutations'] = log["gene"] + ":" + log["mutation"]
78+
log['new_mutations'] = log["gene_symbol"] + ":" + log["mutation"]
7779
# for orf1ab mutations, fill in 'new_mutations' column like: "gene:mutation / nsp:alias"
7880
log.loc[log['alias']!='n/a', 'new_mutations'] = log['new_mutations'] + " / " + log["alias_protein"] + ":" + log["alias"]
7981
# drop duplicates (there shouldn't be any)

0 commit comments

Comments
 (0)