Skip to content

Commit 522cf70

Browse files
committed
Extra PR refactoring
1 parent 437954a commit 522cf70

File tree

2 files changed

+32
-231
lines changed

2 files changed

+32
-231
lines changed

api/data/refactoring.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,18 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"):
247247
f.write("\n")
248248

249249

250-
def process_population_data(df, pop_data, name, pop_ids, index):
250+
def prepare_popmax_calculation(df, pop_data, name, pop_ids, index):
251+
"""
252+
prepares the calculation of popmax and popmax population for a variant.
253+
genome and exome data of ac and an.
254+
255+
:param DataFrame df: DataFrame containing gnomAD data
256+
:param dict pop_data: dictionary containing population data
257+
:param str name: name of the population
258+
:param list[str] pop_ids: list of population ids
259+
:param int index: index of the variant
260+
"""
261+
251262
for pop_id in pop_ids:
252263
df.loc[index, f'{name}_ac_{pop_id}'] = 0
253264
df.loc[index, f'{name}_an_{pop_id}'] = 0
@@ -339,9 +350,9 @@ def request_gnomad_api_data(gene_name):
339350

340351
for i in range(len(exome_populations)):
341352
exome_pop = exome_populations[i]
342-
process_population_data(df, exome_pop, 'exome', population_ids, i)
353+
prepare_popmax_calculation(df, exome_pop, 'exome', population_ids, i)
343354
genome_pop = genome_populations[i]
344-
process_population_data(df, genome_pop, 'genome', population_ids, i)
355+
prepare_popmax_calculation(df, genome_pop, 'genome', population_ids, i)
345356

346357
for population_id in population_ids:
347358
df.loc[:, f'Allele_Frequency_{population_id}'] = (

tests/pipeline.ipynb

Lines changed: 18 additions & 228 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@
77
"collapsed": true,
88
"jupyter": {
99
"outputs_hidden": true
10-
},
11-
"ExecuteTime": {
12-
"end_time": "2024-09-02T18:45:02.492330Z",
13-
"start_time": "2024-09-02T18:45:02.488185Z"
1410
}
1511
},
1612
"source": [
@@ -33,7 +29,7 @@
3329
"pd.options.display.max_columns = 0"
3430
],
3531
"outputs": [],
36-
"execution_count": 11
32+
"execution_count": null
3733
},
3834
{
3935
"cell_type": "code",
@@ -63,11 +59,7 @@
6359
"execution_count": null
6460
},
6561
{
66-
"metadata": {
67-
"ExecuteTime": {
68-
"start_time": "2024-09-02T18:42:20.091398Z"
69-
}
70-
},
62+
"metadata": {},
7163
"cell_type": "code",
7264
"source": [
7365
"gnomad_data = request_gnomad_api_data(\"EYS\")\n",
@@ -79,11 +71,7 @@
7971
"execution_count": null
8072
},
8173
{
82-
"metadata": {
83-
"ExecuteTime": {
84-
"start_time": "2024-09-02T18:44:44.422287Z"
85-
}
86-
},
74+
"metadata": {},
8775
"cell_type": "code",
8876
"source": [
8977
"store_database_for_eys_gene('gnomad', False)\n",
@@ -95,23 +83,15 @@
9583
"execution_count": null
9684
},
9785
{
98-
"metadata": {
99-
"ExecuteTime": {
100-
"start_time": "2024-09-02T18:44:44.497881Z"
101-
}
102-
},
86+
"metadata": {},
10387
"cell_type": "code",
10488
"source": "display(gnomad_data_2)",
10589
"id": "9d3e4d6b5f7be127",
10690
"outputs": [],
10791
"execution_count": null
10892
},
10993
{
110-
"metadata": {
111-
"ExecuteTime": {
112-
"start_time": "2024-09-02T18:44:44.546361Z"
113-
}
114-
},
94+
"metadata": {},
11595
"cell_type": "code",
11696
"source": [
11797
"gnomad_data_2.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_downloaded.csv', index=False)\n",
@@ -122,11 +102,7 @@
122102
"execution_count": null
123103
},
124104
{
125-
"metadata": {
126-
"ExecuteTime": {
127-
"start_time": "2024-09-02T18:44:44.806484Z"
128-
}
129-
},
105+
"metadata": {},
130106
"cell_type": "code",
131107
"source": [
132108
"len(gnomad_data_2), len(gnomad_data)\n",
@@ -138,168 +114,15 @@
138114
"execution_count": null
139115
},
140116
{
141-
"metadata": {
142-
"ExecuteTime": {
143-
"end_time": "2024-09-02T18:45:06.035450Z",
144-
"start_time": "2024-09-02T18:45:06.022832Z"
145-
}
146-
},
117+
"metadata": {},
147118
"cell_type": "code",
148119
"source": "gnomad_data",
149120
"id": "96283480cccf641",
150-
"outputs": [
151-
{
152-
"data": {
153-
"text/plain": [
154-
" Popmax Popmax population ... Allele Frequency variant_id\n",
155-
"0 0.000016 African/African American ... 1.807419e-06 6-63720525-A-G\n",
156-
"1 0.000192 East Asian ... 6.573844e-06 6-63720525-A-T\n",
157-
"2 0.000000 ... 0.000000e+00 6-63720525-A-C\n",
158-
"3 0.000020 South Asian ... 1.045299e-06 6-63720526-T-A\n",
159-
"4 0.000000 ... 0.000000e+00 6-63720527-G-T\n",
160-
"... ... ... ... ... ...\n",
161-
"14295 0.000000 ... 0.000000e+00 6-65495479-G-T\n",
162-
"14296 0.000031 African/African American ... 1.446349e-06 6-65495479-G-A\n",
163-
"14297 0.000070 Admixed American ... 2.629510e-06 6-65495482-A-G\n",
164-
"14298 0.000060 South Asian ... 3.645085e-06 6-65495484-T-G\n",
165-
"14299 0.000012 South Asian ... 7.310070e-07 6-65495485-T-C\n",
166-
"\n",
167-
"[14300 rows x 5 columns]"
168-
],
169-
"text/html": [
170-
"<div>\n",
171-
"<style scoped>\n",
172-
" .dataframe tbody tr th:only-of-type {\n",
173-
" vertical-align: middle;\n",
174-
" }\n",
175-
"\n",
176-
" .dataframe tbody tr th {\n",
177-
" vertical-align: top;\n",
178-
" }\n",
179-
"\n",
180-
" .dataframe thead th {\n",
181-
" text-align: right;\n",
182-
" }\n",
183-
"</style>\n",
184-
"<table border=\"1\" class=\"dataframe\">\n",
185-
" <thead>\n",
186-
" <tr style=\"text-align: right;\">\n",
187-
" <th></th>\n",
188-
" <th>Popmax</th>\n",
189-
" <th>Popmax population</th>\n",
190-
" <th>Homozygote Count</th>\n",
191-
" <th>Allele Frequency</th>\n",
192-
" <th>variant_id</th>\n",
193-
" </tr>\n",
194-
" </thead>\n",
195-
" <tbody>\n",
196-
" <tr>\n",
197-
" <th>0</th>\n",
198-
" <td>0.000016</td>\n",
199-
" <td>African/African American</td>\n",
200-
" <td>0.0</td>\n",
201-
" <td>1.807419e-06</td>\n",
202-
" <td>6-63720525-A-G</td>\n",
203-
" </tr>\n",
204-
" <tr>\n",
205-
" <th>1</th>\n",
206-
" <td>0.000192</td>\n",
207-
" <td>East Asian</td>\n",
208-
" <td>0.0</td>\n",
209-
" <td>6.573844e-06</td>\n",
210-
" <td>6-63720525-A-T</td>\n",
211-
" </tr>\n",
212-
" <tr>\n",
213-
" <th>2</th>\n",
214-
" <td>0.000000</td>\n",
215-
" <td></td>\n",
216-
" <td>0.0</td>\n",
217-
" <td>0.000000e+00</td>\n",
218-
" <td>6-63720525-A-C</td>\n",
219-
" </tr>\n",
220-
" <tr>\n",
221-
" <th>3</th>\n",
222-
" <td>0.000020</td>\n",
223-
" <td>South Asian</td>\n",
224-
" <td>0.0</td>\n",
225-
" <td>1.045299e-06</td>\n",
226-
" <td>6-63720526-T-A</td>\n",
227-
" </tr>\n",
228-
" <tr>\n",
229-
" <th>4</th>\n",
230-
" <td>0.000000</td>\n",
231-
" <td></td>\n",
232-
" <td>0.0</td>\n",
233-
" <td>0.000000e+00</td>\n",
234-
" <td>6-63720527-G-T</td>\n",
235-
" </tr>\n",
236-
" <tr>\n",
237-
" <th>...</th>\n",
238-
" <td>...</td>\n",
239-
" <td>...</td>\n",
240-
" <td>...</td>\n",
241-
" <td>...</td>\n",
242-
" <td>...</td>\n",
243-
" </tr>\n",
244-
" <tr>\n",
245-
" <th>14295</th>\n",
246-
" <td>0.000000</td>\n",
247-
" <td></td>\n",
248-
" <td>0.0</td>\n",
249-
" <td>0.000000e+00</td>\n",
250-
" <td>6-65495479-G-T</td>\n",
251-
" </tr>\n",
252-
" <tr>\n",
253-
" <th>14296</th>\n",
254-
" <td>0.000031</td>\n",
255-
" <td>African/African American</td>\n",
256-
" <td>0.0</td>\n",
257-
" <td>1.446349e-06</td>\n",
258-
" <td>6-65495479-G-A</td>\n",
259-
" </tr>\n",
260-
" <tr>\n",
261-
" <th>14297</th>\n",
262-
" <td>0.000070</td>\n",
263-
" <td>Admixed American</td>\n",
264-
" <td>0.0</td>\n",
265-
" <td>2.629510e-06</td>\n",
266-
" <td>6-65495482-A-G</td>\n",
267-
" </tr>\n",
268-
" <tr>\n",
269-
" <th>14298</th>\n",
270-
" <td>0.000060</td>\n",
271-
" <td>South Asian</td>\n",
272-
" <td>0.0</td>\n",
273-
" <td>3.645085e-06</td>\n",
274-
" <td>6-65495484-T-G</td>\n",
275-
" </tr>\n",
276-
" <tr>\n",
277-
" <th>14299</th>\n",
278-
" <td>0.000012</td>\n",
279-
" <td>South Asian</td>\n",
280-
" <td>0.0</td>\n",
281-
" <td>7.310070e-07</td>\n",
282-
" <td>6-65495485-T-C</td>\n",
283-
" </tr>\n",
284-
" </tbody>\n",
285-
"</table>\n",
286-
"<p>14300 rows × 5 columns</p>\n",
287-
"</div>"
288-
]
289-
},
290-
"execution_count": 12,
291-
"metadata": {},
292-
"output_type": "execute_result"
293-
}
294-
],
295-
"execution_count": 12
121+
"outputs": [],
122+
"execution_count": null
296123
},
297124
{
298-
"metadata": {
299-
"ExecuteTime": {
300-
"start_time": "2024-09-02T18:44:44.827926Z"
301-
}
302-
},
125+
"metadata": {},
303126
"cell_type": "code",
304127
"source": [
305128
"missing_from_api = []\n",
@@ -320,23 +143,15 @@
320143
"execution_count": null
321144
},
322145
{
323-
"metadata": {
324-
"ExecuteTime": {
325-
"start_time": "2024-09-02T18:44:45.626358Z"
326-
}
327-
},
146+
"metadata": {},
328147
"cell_type": "code",
329148
"source": "missing_data.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_missing.csv', index=False)",
330149
"id": "388120b03b094511",
331150
"outputs": [],
332151
"execution_count": null
333152
},
334153
{
335-
"metadata": {
336-
"ExecuteTime": {
337-
"start_time": "2024-09-02T18:44:45.626358Z"
338-
}
339-
},
154+
"metadata": {},
340155
"cell_type": "code",
341156
"source": [
342157
"set_lovd_dtypes(data)\n",
@@ -358,11 +173,7 @@
358173
"execution_count": null
359174
},
360175
{
361-
"metadata": {
362-
"ExecuteTime": {
363-
"start_time": "2024-09-02T18:44:45.627863Z"
364-
}
365-
},
176+
"metadata": {},
366177
"cell_type": "code",
367178
"source": [
368179
"for i in data:\n",
@@ -374,11 +185,7 @@
374185
"execution_count": null
375186
},
376187
{
377-
"metadata": {
378-
"ExecuteTime": {
379-
"start_time": "2024-09-02T18:44:45.628871Z"
380-
}
381-
},
188+
"metadata": {},
382189
"cell_type": "code",
383190
"source": [
384191
"set_lovd_dtypes(data)\n",
@@ -391,24 +198,15 @@
391198
"execution_count": null
392199
},
393200
{
394-
"metadata": {
395-
"ExecuteTime": {
396-
"end_time": "2024-09-02T18:44:45.646110Z",
397-
"start_time": "2024-09-02T18:44:45.629871Z"
398-
}
399-
},
201+
"metadata": {},
400202
"cell_type": "code",
401203
"source": "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")",
402204
"id": "c968af1617be40db",
403205
"outputs": [],
404206
"execution_count": null
405207
},
406208
{
407-
"metadata": {
408-
"ExecuteTime": {
409-
"start_time": "2024-09-02T18:44:45.630870Z"
410-
}
411-
},
209+
"metadata": {},
412210
"cell_type": "code",
413211
"source": [
414212
"from subprocess import Popen\n",
@@ -421,11 +219,7 @@
421219
"execution_count": null
422220
},
423221
{
424-
"metadata": {
425-
"ExecuteTime": {
426-
"start_time": "2024-09-02T18:44:45.631870Z"
427-
}
428-
},
222+
"metadata": {},
429223
"cell_type": "code",
430224
"source": [
431225
"from api.tools import get_revel_scores\n",
@@ -442,11 +236,7 @@
442236
"execution_count": null
443237
},
444238
{
445-
"metadata": {
446-
"ExecuteTime": {
447-
"start_time": "2024-09-02T18:44:45.631870Z"
448-
}
449-
},
239+
"metadata": {},
450240
"cell_type": "code",
451241
"source": "",
452242
"id": "6f0abfb50bd211a0",

0 commit comments

Comments
 (0)