@@ -316,37 +316,37 @@ def request_gnomad_api_data(gene_name):
316
316
}}
317
317
"""
318
318
319
- response = requests .post (url , json = {'query' : query }, timeout = 300 )# timeout set to 5 minutes
319
+ response = requests .post (url , json = {'query' : query }, timeout = 300 ) # timeout set to 5 minutes
320
320
321
321
if response .status_code != 200 :
322
322
print ('Error:' , response .status_code )
323
- return None
324
323
325
324
data = response .json ()['data' ]['gene' ]['variants' ]
326
325
327
326
df = pd .json_normalize (data )
328
327
329
- df [ 'total_ac' ] = df [ 'exome.ac' ].fillna (0 ) + df [ 'genome.ac' ].fillna (0 )
330
- df [ 'total_an' ] = df [ 'exome.an' ].fillna (0 ) + df [ 'genome.an' ].fillna (0 )
328
+ df . loc [:, 'total_ac' ] = df . loc [:, 'exome.ac' ].fillna (0 ) + df . loc [:, 'genome.ac' ].fillna (0 )
329
+ df . loc [:, 'total_an' ] = df . loc [:, 'exome.an' ].fillna (0 ) + df . loc [:, 'genome.an' ].fillna (0 )
331
330
332
- df [ 'HGVS Consequence' ] = df [ 'hgvsc' ].fillna (0 ) # cDNA change
333
- df [ 'Protein Consequence' ] = df [ 'hgvsp' ].fillna (0 ) # Protein change
331
+ df . loc [:, 'HGVS Consequence' ] = df . loc [:, 'hgvsc' ].fillna (0 ) # cDNA change
332
+ df . loc [:, 'Protein Consequence' ] = df . loc [:, 'hgvsp' ].fillna (0 ) # Protein change
334
333
335
- df [ 'Allele Frequency' ] = df [ 'total_ac' ] / df [ 'total_an' ]
336
- df [ 'Homozygote Count' ] = df [ 'exome.ac_hom' ].fillna (0 ) + df [ 'genome.ac_hom' ].fillna (0 )
337
- exome_populations = df [ 'exome.populations' ]
338
- genome_populations = df [ 'genome.populations' ]
339
- ids = ['afr' , 'eas' , 'asj' , 'sas' , 'nfe' , 'fin' , 'mid' , 'amr' , 'ami' , 'remaining' ]
334
+ df . loc [:, 'Allele Frequency' ] = df . loc [:, 'total_ac' ] / df . loc [:, 'total_an' ]
335
+ df . loc [:, 'Homozygote Count' ] = df . loc [:, 'exome.ac_hom' ].fillna (0 ) + df . loc [:, 'genome.ac_hom' ].fillna (0 )
336
+ exome_populations = df . loc [:, 'exome.populations' ]
337
+ genome_populations = df . loc [:, 'genome.populations' ]
338
+ population_ids = ['afr' , 'eas' , 'asj' , 'sas' , 'nfe' , 'fin' , 'mid' , 'amr' , 'ami' , 'remaining' ]
340
339
341
340
for i in range (len (exome_populations )):
342
341
exome_pop = exome_populations [i ]
343
- process_population_data (df , exome_pop , 'exome' , ids , i )
342
+ process_population_data (df , exome_pop , 'exome' , population_ids , i )
344
343
genome_pop = genome_populations [i ]
345
- process_population_data (df , genome_pop , 'genome' , ids , i )
344
+ process_population_data (df , genome_pop , 'genome' , population_ids , i )
346
345
347
- for variant_id in ids :
348
- df [f'Allele_Frequency_{ variant_id } ' ] = (df [f'exome_ac_{ variant_id } ' ].fillna (0 ) + df [f'genome_ac_{ variant_id } ' ].fillna (0 )) / (
349
- df [f'exome_an_{ variant_id } ' ].fillna (0 ) + df [f'genome_an_{ variant_id } ' ].fillna (0 ))
346
+ for population_id in population_ids :
347
+ df .loc [:, f'Allele_Frequency_{ population_id } ' ] = (
348
+ (df .loc [:, f'exome_ac_{ population_id } ' ].fillna (0 ) + df .loc [:, f'genome_ac_{ population_id } ' ].fillna (0 )) / (
349
+ df .loc [:, f'exome_an_{ population_id } ' ].fillna (0 ) + df .loc [:, f'genome_an_{ population_id } ' ].fillna (0 )))
350
350
population_mapping = {
351
351
'afr' : 'African/African American' ,
352
352
'eas' : 'East Asian' ,
@@ -360,19 +360,21 @@ def request_gnomad_api_data(gene_name):
360
360
'remaining' : 'Remaining' ,
361
361
'' : ''
362
362
}
363
- for i in range (len (df )):
363
+
364
+ for i in range (df .shape [0 ]):
364
365
max_pop = 0
365
- maxid = ''
366
- for variant_id in ids :
367
- if df .loc [i , f'Allele_Frequency_{ variant_id } ' ] > max_pop :
368
- max_pop = df .loc [i , f'Allele_Frequency_{ variant_id } ' ]
369
- maxid = variant_id
366
+ max_id = ''
367
+ for population_id in population_ids :
368
+ if df .loc [i , f'Allele_Frequency_{ population_id } ' ] > max_pop :
369
+ max_pop = df .loc [i , f'Allele_Frequency_{ population_id } ' ]
370
+ max_id = population_id
370
371
df .loc [i , 'Popmax' ] = max_pop
371
- df .loc [i , 'Popmax population' ] = population_mapping [maxid ]
372
- not_to_drop = ['Popmax' , 'Popmax population' , 'Homozygote Count' , 'Allele Frequency' , 'variant_id' ,
373
- 'cDNA change' , 'Protein change' ]
374
- df = df .drop ([col for col in df .columns if col not in not_to_drop ], axis = 1 )
372
+ df .loc [i , 'Popmax population' ] = population_mapping [max_id ]
373
+ not_to_drop = ['Popmax' , 'Popmax population' , 'Homozygote Count' , 'Allele Frequency' ,
374
+ 'variant_id' , 'cDNA change' , 'Protein change' ]
375
+
376
+ df = df .filter (not_to_drop , axis = "columns" )
375
377
376
- df .rename (columns = {'variant_id' : 'gnomAD ID' }, inplace = True )
378
+ df .rename (columns = {'variant_id' : 'gnomAD ID' })
377
379
378
380
return df
0 commit comments