|
7 | 7 | "collapsed": true,
|
8 | 8 | "jupyter": {
|
9 | 9 | "outputs_hidden": true
|
10 |
| - }, |
11 |
| - "ExecuteTime": { |
12 |
| - "end_time": "2024-09-02T18:45:02.492330Z", |
13 |
| - "start_time": "2024-09-02T18:45:02.488185Z" |
14 | 10 | }
|
15 | 11 | },
|
16 | 12 | "source": [
|
|
33 | 29 | "pd.options.display.max_columns = 0"
|
34 | 30 | ],
|
35 | 31 | "outputs": [],
|
36 |
| - "execution_count": 11 |
| 32 | + "execution_count": null |
37 | 33 | },
|
38 | 34 | {
|
39 | 35 | "cell_type": "code",
|
|
63 | 59 | "execution_count": null
|
64 | 60 | },
|
65 | 61 | {
|
66 |
| - "metadata": { |
67 |
| - "ExecuteTime": { |
68 |
| - "start_time": "2024-09-02T18:42:20.091398Z" |
69 |
| - } |
70 |
| - }, |
| 62 | + "metadata": {}, |
71 | 63 | "cell_type": "code",
|
72 | 64 | "source": [
|
73 | 65 | "gnomad_data = request_gnomad_api_data(\"EYS\")\n",
|
|
79 | 71 | "execution_count": null
|
80 | 72 | },
|
81 | 73 | {
|
82 |
| - "metadata": { |
83 |
| - "ExecuteTime": { |
84 |
| - "start_time": "2024-09-02T18:44:44.422287Z" |
85 |
| - } |
86 |
| - }, |
| 74 | + "metadata": {}, |
87 | 75 | "cell_type": "code",
|
88 | 76 | "source": [
|
89 | 77 | "store_database_for_eys_gene('gnomad', False)\n",
|
|
95 | 83 | "execution_count": null
|
96 | 84 | },
|
97 | 85 | {
|
98 |
| - "metadata": { |
99 |
| - "ExecuteTime": { |
100 |
| - "start_time": "2024-09-02T18:44:44.497881Z" |
101 |
| - } |
102 |
| - }, |
| 86 | + "metadata": {}, |
103 | 87 | "cell_type": "code",
|
104 | 88 | "source": "display(gnomad_data_2)",
|
105 | 89 | "id": "9d3e4d6b5f7be127",
|
106 | 90 | "outputs": [],
|
107 | 91 | "execution_count": null
|
108 | 92 | },
|
109 | 93 | {
|
110 |
| - "metadata": { |
111 |
| - "ExecuteTime": { |
112 |
| - "start_time": "2024-09-02T18:44:44.546361Z" |
113 |
| - } |
114 |
| - }, |
| 94 | + "metadata": {}, |
115 | 95 | "cell_type": "code",
|
116 | 96 | "source": [
|
117 | 97 | "gnomad_data_2.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_downloaded.csv', index=False)\n",
|
|
122 | 102 | "execution_count": null
|
123 | 103 | },
|
124 | 104 | {
|
125 |
| - "metadata": { |
126 |
| - "ExecuteTime": { |
127 |
| - "start_time": "2024-09-02T18:44:44.806484Z" |
128 |
| - } |
129 |
| - }, |
| 105 | + "metadata": {}, |
130 | 106 | "cell_type": "code",
|
131 | 107 | "source": [
|
132 | 108 | "len(gnomad_data_2), len(gnomad_data)\n",
|
|
138 | 114 | "execution_count": null
|
139 | 115 | },
|
140 | 116 | {
|
141 |
| - "metadata": { |
142 |
| - "ExecuteTime": { |
143 |
| - "end_time": "2024-09-02T18:45:06.035450Z", |
144 |
| - "start_time": "2024-09-02T18:45:06.022832Z" |
145 |
| - } |
146 |
| - }, |
| 117 | + "metadata": {}, |
147 | 118 | "cell_type": "code",
|
148 | 119 | "source": "gnomad_data",
|
149 | 120 | "id": "96283480cccf641",
|
150 |
| - "outputs": [ |
151 |
| - { |
152 |
| - "data": { |
153 |
| - "text/plain": [ |
154 |
| - " Popmax Popmax population ... Allele Frequency variant_id\n", |
155 |
| - "0 0.000016 African/African American ... 1.807419e-06 6-63720525-A-G\n", |
156 |
| - "1 0.000192 East Asian ... 6.573844e-06 6-63720525-A-T\n", |
157 |
| - "2 0.000000 ... 0.000000e+00 6-63720525-A-C\n", |
158 |
| - "3 0.000020 South Asian ... 1.045299e-06 6-63720526-T-A\n", |
159 |
| - "4 0.000000 ... 0.000000e+00 6-63720527-G-T\n", |
160 |
| - "... ... ... ... ... ...\n", |
161 |
| - "14295 0.000000 ... 0.000000e+00 6-65495479-G-T\n", |
162 |
| - "14296 0.000031 African/African American ... 1.446349e-06 6-65495479-G-A\n", |
163 |
| - "14297 0.000070 Admixed American ... 2.629510e-06 6-65495482-A-G\n", |
164 |
| - "14298 0.000060 South Asian ... 3.645085e-06 6-65495484-T-G\n", |
165 |
| - "14299 0.000012 South Asian ... 7.310070e-07 6-65495485-T-C\n", |
166 |
| - "\n", |
167 |
| - "[14300 rows x 5 columns]" |
168 |
| - ], |
169 |
| - "text/html": [ |
170 |
| - "<div>\n", |
171 |
| - "<style scoped>\n", |
172 |
| - " .dataframe tbody tr th:only-of-type {\n", |
173 |
| - " vertical-align: middle;\n", |
174 |
| - " }\n", |
175 |
| - "\n", |
176 |
| - " .dataframe tbody tr th {\n", |
177 |
| - " vertical-align: top;\n", |
178 |
| - " }\n", |
179 |
| - "\n", |
180 |
| - " .dataframe thead th {\n", |
181 |
| - " text-align: right;\n", |
182 |
| - " }\n", |
183 |
| - "</style>\n", |
184 |
| - "<table border=\"1\" class=\"dataframe\">\n", |
185 |
| - " <thead>\n", |
186 |
| - " <tr style=\"text-align: right;\">\n", |
187 |
| - " <th></th>\n", |
188 |
| - " <th>Popmax</th>\n", |
189 |
| - " <th>Popmax population</th>\n", |
190 |
| - " <th>Homozygote Count</th>\n", |
191 |
| - " <th>Allele Frequency</th>\n", |
192 |
| - " <th>variant_id</th>\n", |
193 |
| - " </tr>\n", |
194 |
| - " </thead>\n", |
195 |
| - " <tbody>\n", |
196 |
| - " <tr>\n", |
197 |
| - " <th>0</th>\n", |
198 |
| - " <td>0.000016</td>\n", |
199 |
| - " <td>African/African American</td>\n", |
200 |
| - " <td>0.0</td>\n", |
201 |
| - " <td>1.807419e-06</td>\n", |
202 |
| - " <td>6-63720525-A-G</td>\n", |
203 |
| - " </tr>\n", |
204 |
| - " <tr>\n", |
205 |
| - " <th>1</th>\n", |
206 |
| - " <td>0.000192</td>\n", |
207 |
| - " <td>East Asian</td>\n", |
208 |
| - " <td>0.0</td>\n", |
209 |
| - " <td>6.573844e-06</td>\n", |
210 |
| - " <td>6-63720525-A-T</td>\n", |
211 |
| - " </tr>\n", |
212 |
| - " <tr>\n", |
213 |
| - " <th>2</th>\n", |
214 |
| - " <td>0.000000</td>\n", |
215 |
| - " <td></td>\n", |
216 |
| - " <td>0.0</td>\n", |
217 |
| - " <td>0.000000e+00</td>\n", |
218 |
| - " <td>6-63720525-A-C</td>\n", |
219 |
| - " </tr>\n", |
220 |
| - " <tr>\n", |
221 |
| - " <th>3</th>\n", |
222 |
| - " <td>0.000020</td>\n", |
223 |
| - " <td>South Asian</td>\n", |
224 |
| - " <td>0.0</td>\n", |
225 |
| - " <td>1.045299e-06</td>\n", |
226 |
| - " <td>6-63720526-T-A</td>\n", |
227 |
| - " </tr>\n", |
228 |
| - " <tr>\n", |
229 |
| - " <th>4</th>\n", |
230 |
| - " <td>0.000000</td>\n", |
231 |
| - " <td></td>\n", |
232 |
| - " <td>0.0</td>\n", |
233 |
| - " <td>0.000000e+00</td>\n", |
234 |
| - " <td>6-63720527-G-T</td>\n", |
235 |
| - " </tr>\n", |
236 |
| - " <tr>\n", |
237 |
| - " <th>...</th>\n", |
238 |
| - " <td>...</td>\n", |
239 |
| - " <td>...</td>\n", |
240 |
| - " <td>...</td>\n", |
241 |
| - " <td>...</td>\n", |
242 |
| - " <td>...</td>\n", |
243 |
| - " </tr>\n", |
244 |
| - " <tr>\n", |
245 |
| - " <th>14295</th>\n", |
246 |
| - " <td>0.000000</td>\n", |
247 |
| - " <td></td>\n", |
248 |
| - " <td>0.0</td>\n", |
249 |
| - " <td>0.000000e+00</td>\n", |
250 |
| - " <td>6-65495479-G-T</td>\n", |
251 |
| - " </tr>\n", |
252 |
| - " <tr>\n", |
253 |
| - " <th>14296</th>\n", |
254 |
| - " <td>0.000031</td>\n", |
255 |
| - " <td>African/African American</td>\n", |
256 |
| - " <td>0.0</td>\n", |
257 |
| - " <td>1.446349e-06</td>\n", |
258 |
| - " <td>6-65495479-G-A</td>\n", |
259 |
| - " </tr>\n", |
260 |
| - " <tr>\n", |
261 |
| - " <th>14297</th>\n", |
262 |
| - " <td>0.000070</td>\n", |
263 |
| - " <td>Admixed American</td>\n", |
264 |
| - " <td>0.0</td>\n", |
265 |
| - " <td>2.629510e-06</td>\n", |
266 |
| - " <td>6-65495482-A-G</td>\n", |
267 |
| - " </tr>\n", |
268 |
| - " <tr>\n", |
269 |
| - " <th>14298</th>\n", |
270 |
| - " <td>0.000060</td>\n", |
271 |
| - " <td>South Asian</td>\n", |
272 |
| - " <td>0.0</td>\n", |
273 |
| - " <td>3.645085e-06</td>\n", |
274 |
| - " <td>6-65495484-T-G</td>\n", |
275 |
| - " </tr>\n", |
276 |
| - " <tr>\n", |
277 |
| - " <th>14299</th>\n", |
278 |
| - " <td>0.000012</td>\n", |
279 |
| - " <td>South Asian</td>\n", |
280 |
| - " <td>0.0</td>\n", |
281 |
| - " <td>7.310070e-07</td>\n", |
282 |
| - " <td>6-65495485-T-C</td>\n", |
283 |
| - " </tr>\n", |
284 |
| - " </tbody>\n", |
285 |
| - "</table>\n", |
286 |
| - "<p>14300 rows × 5 columns</p>\n", |
287 |
| - "</div>" |
288 |
| - ] |
289 |
| - }, |
290 |
| - "execution_count": 12, |
291 |
| - "metadata": {}, |
292 |
| - "output_type": "execute_result" |
293 |
| - } |
294 |
| - ], |
295 |
| - "execution_count": 12 |
| 121 | + "outputs": [], |
| 122 | + "execution_count": null |
296 | 123 | },
|
297 | 124 | {
|
298 |
| - "metadata": { |
299 |
| - "ExecuteTime": { |
300 |
| - "start_time": "2024-09-02T18:44:44.827926Z" |
301 |
| - } |
302 |
| - }, |
| 125 | + "metadata": {}, |
303 | 126 | "cell_type": "code",
|
304 | 127 | "source": [
|
305 | 128 | "missing_from_api = []\n",
|
|
320 | 143 | "execution_count": null
|
321 | 144 | },
|
322 | 145 | {
|
323 |
| - "metadata": { |
324 |
| - "ExecuteTime": { |
325 |
| - "start_time": "2024-09-02T18:44:45.626358Z" |
326 |
| - } |
327 |
| - }, |
| 146 | + "metadata": {}, |
328 | 147 | "cell_type": "code",
|
329 | 148 | "source": "missing_data.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_missing.csv', index=False)",
|
330 | 149 | "id": "388120b03b094511",
|
331 | 150 | "outputs": [],
|
332 | 151 | "execution_count": null
|
333 | 152 | },
|
334 | 153 | {
|
335 |
| - "metadata": { |
336 |
| - "ExecuteTime": { |
337 |
| - "start_time": "2024-09-02T18:44:45.626358Z" |
338 |
| - } |
339 |
| - }, |
| 154 | + "metadata": {}, |
340 | 155 | "cell_type": "code",
|
341 | 156 | "source": [
|
342 | 157 | "set_lovd_dtypes(data)\n",
|
|
358 | 173 | "execution_count": null
|
359 | 174 | },
|
360 | 175 | {
|
361 |
| - "metadata": { |
362 |
| - "ExecuteTime": { |
363 |
| - "start_time": "2024-09-02T18:44:45.627863Z" |
364 |
| - } |
365 |
| - }, |
| 176 | + "metadata": {}, |
366 | 177 | "cell_type": "code",
|
367 | 178 | "source": [
|
368 | 179 | "for i in data:\n",
|
|
374 | 185 | "execution_count": null
|
375 | 186 | },
|
376 | 187 | {
|
377 |
| - "metadata": { |
378 |
| - "ExecuteTime": { |
379 |
| - "start_time": "2024-09-02T18:44:45.628871Z" |
380 |
| - } |
381 |
| - }, |
| 188 | + "metadata": {}, |
382 | 189 | "cell_type": "code",
|
383 | 190 | "source": [
|
384 | 191 | "set_lovd_dtypes(data)\n",
|
|
391 | 198 | "execution_count": null
|
392 | 199 | },
|
393 | 200 | {
|
394 |
| - "metadata": { |
395 |
| - "ExecuteTime": { |
396 |
| - "end_time": "2024-09-02T18:44:45.646110Z", |
397 |
| - "start_time": "2024-09-02T18:44:45.629871Z" |
398 |
| - } |
399 |
| - }, |
| 201 | + "metadata": {}, |
400 | 202 | "cell_type": "code",
|
401 | 203 | "source": "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")",
|
402 | 204 | "id": "c968af1617be40db",
|
403 | 205 | "outputs": [],
|
404 | 206 | "execution_count": null
|
405 | 207 | },
|
406 | 208 | {
|
407 |
| - "metadata": { |
408 |
| - "ExecuteTime": { |
409 |
| - "start_time": "2024-09-02T18:44:45.630870Z" |
410 |
| - } |
411 |
| - }, |
| 209 | + "metadata": {}, |
412 | 210 | "cell_type": "code",
|
413 | 211 | "source": [
|
414 | 212 | "from subprocess import Popen\n",
|
|
421 | 219 | "execution_count": null
|
422 | 220 | },
|
423 | 221 | {
|
424 |
| - "metadata": { |
425 |
| - "ExecuteTime": { |
426 |
| - "start_time": "2024-09-02T18:44:45.631870Z" |
427 |
| - } |
428 |
| - }, |
| 222 | + "metadata": {}, |
429 | 223 | "cell_type": "code",
|
430 | 224 | "source": [
|
431 | 225 | "from api.tools import get_revel_scores\n",
|
|
442 | 236 | "execution_count": null
|
443 | 237 | },
|
444 | 238 | {
|
445 |
| - "metadata": { |
446 |
| - "ExecuteTime": { |
447 |
| - "start_time": "2024-09-02T18:44:45.631870Z" |
448 |
| - } |
449 |
| - }, |
| 239 | + "metadata": {}, |
450 | 240 | "cell_type": "code",
|
451 | 241 | "source": "",
|
452 | 242 | "id": "6f0abfb50bd211a0",
|
|
0 commit comments