Skip to content

Commit 437954a

Browse files
committed
Refactored and resolved PR comments
1 parent 4d3575a commit 437954a

File tree

2 files changed

+256
-53
lines changed

2 files changed

+256
-53
lines changed

api/data/refactoring.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -316,37 +316,37 @@ def request_gnomad_api_data(gene_name):
316316
}}
317317
"""
318318

319-
response = requests.post(url, json={'query': query}, timeout=300)# timeout set to 5 minutes
319+
response = requests.post(url, json={'query': query}, timeout=300) # timeout set to 5 minutes
320320

321321
if response.status_code != 200:
322322
print('Error:', response.status_code)
323-
return None
324323

325324
data = response.json()['data']['gene']['variants']
326325

327326
df = pd.json_normalize(data)
328327

329-
df['total_ac'] = df['exome.ac'].fillna(0) + df['genome.ac'].fillna(0)
330-
df['total_an'] = df['exome.an'].fillna(0) + df['genome.an'].fillna(0)
328+
df.loc[:, 'total_ac'] = df.loc[:, 'exome.ac'].fillna(0) + df.loc[:, 'genome.ac'].fillna(0)
329+
df.loc[:, 'total_an'] = df.loc[:, 'exome.an'].fillna(0) + df.loc[:, 'genome.an'].fillna(0)
331330

332-
df['HGVS Consequence'] = df['hgvsc'].fillna(0) # cDNA change
333-
df['Protein Consequence'] = df['hgvsp'].fillna(0) # Protein change
331+
df.loc[:, 'HGVS Consequence'] = df.loc[:, 'hgvsc'].fillna(0) # cDNA change
332+
df.loc[:, 'Protein Consequence'] = df.loc[:, 'hgvsp'].fillna(0) # Protein change
334333

335-
df['Allele Frequency'] = df['total_ac'] / df['total_an']
336-
df['Homozygote Count'] = df['exome.ac_hom'].fillna(0) + df['genome.ac_hom'].fillna(0)
337-
exome_populations = df['exome.populations']
338-
genome_populations = df['genome.populations']
339-
ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining']
334+
df.loc[:, 'Allele Frequency'] = df.loc[:, 'total_ac'] / df.loc[:, 'total_an']
335+
df.loc[:, 'Homozygote Count'] = df.loc[:, 'exome.ac_hom'].fillna(0) + df.loc[:, 'genome.ac_hom'].fillna(0)
336+
exome_populations = df.loc[:, 'exome.populations']
337+
genome_populations = df.loc[:, 'genome.populations']
338+
population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining']
340339

341340
for i in range(len(exome_populations)):
342341
exome_pop = exome_populations[i]
343-
process_population_data(df, exome_pop, 'exome', ids, i)
342+
process_population_data(df, exome_pop, 'exome', population_ids, i)
344343
genome_pop = genome_populations[i]
345-
process_population_data(df, genome_pop, 'genome', ids, i)
344+
process_population_data(df, genome_pop, 'genome', population_ids, i)
346345

347-
for variant_id in ids:
348-
df[f'Allele_Frequency_{variant_id}'] = (df[f'exome_ac_{variant_id}'].fillna(0) + df[f'genome_ac_{variant_id}'].fillna(0)) / (
349-
df[f'exome_an_{variant_id}'].fillna(0) + df[f'genome_an_{variant_id}'].fillna(0))
346+
for population_id in population_ids:
347+
df.loc[:, f'Allele_Frequency_{population_id}'] = (
348+
(df.loc[:, f'exome_ac_{population_id}'].fillna(0) + df.loc[:, f'genome_ac_{population_id}'].fillna(0)) / (
349+
df.loc[:, f'exome_an_{population_id}'].fillna(0) + df.loc[:, f'genome_an_{population_id}'].fillna(0)))
350350
population_mapping = {
351351
'afr': 'African/African American',
352352
'eas': 'East Asian',
@@ -360,19 +360,21 @@ def request_gnomad_api_data(gene_name):
360360
'remaining': 'Remaining',
361361
'': ''
362362
}
363-
for i in range(len(df)):
363+
364+
for i in range(df.shape[0]):
364365
max_pop = 0
365-
maxid = ''
366-
for variant_id in ids:
367-
if df.loc[i, f'Allele_Frequency_{variant_id}'] > max_pop:
368-
max_pop = df.loc[i, f'Allele_Frequency_{variant_id}']
369-
maxid = variant_id
366+
max_id = ''
367+
for population_id in population_ids:
368+
if df.loc[i, f'Allele_Frequency_{population_id}'] > max_pop:
369+
max_pop = df.loc[i, f'Allele_Frequency_{population_id}']
370+
max_id = population_id
370371
df.loc[i, 'Popmax'] = max_pop
371-
df.loc[i, 'Popmax population'] = population_mapping[maxid]
372-
not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', 'variant_id',
373-
'cDNA change', 'Protein change']
374-
df = df.drop([col for col in df.columns if col not in not_to_drop], axis=1)
372+
df.loc[i, 'Popmax population'] = population_mapping[max_id]
373+
not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency',
374+
'variant_id', 'cDNA change', 'Protein change']
375+
376+
df = df.filter(not_to_drop, axis="columns")
375377

376-
df.rename(columns={'variant_id': 'gnomAD ID'}, inplace=True)
378+
df.rename(columns={'variant_id': 'gnomAD ID'})
377379

378380
return df

0 commit comments

Comments
 (0)