From c184c960e8ce6d0dbacebdb72dec65c1f413ee41 Mon Sep 17 00:00:00 2001 From: Sheeba Samuel Date: Fri, 27 Sep 2024 01:56:50 +0200 Subject: [PATCH] Update the notebooks for SPARQL queries and evaluation with benchmark and performance metrics. --- notebooks/FAIRJupyter_SPARQL.ipynb | 754 +++++++++++++++++++++----- notebooks/FAIRJupyter_benchmark.ipynb | 654 ++++++++++++++++++++++ 2 files changed, 1267 insertions(+), 141 deletions(-) create mode 100644 notebooks/FAIRJupyter_benchmark.ipynb diff --git a/notebooks/FAIRJupyter_SPARQL.ipynb b/notebooks/FAIRJupyter_SPARQL.ipynb index 98c5fa5..6ec22ae 100644 --- a/notebooks/FAIRJupyter_SPARQL.ipynb +++ b/notebooks/FAIRJupyter_SPARQL.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "id": "93739f34-2a55-4882-baa8-1085998fadf6", "metadata": {}, "outputs": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "id": "13e5348b-58ee-4c63-98f6-c16a79fd2c34", "metadata": {}, "outputs": [ @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "id": "bb9f2020-8098-4da9-804b-62b0c9ef9120", "metadata": {}, "outputs": [], @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "10ba0428-5784-47c6-8a78-5dfcaad886cc", "metadata": {}, "outputs": [], @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "id": "70c896be-d1b3-4bdc-9276-00afb61585ba", "metadata": {}, "outputs": [], @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "id": "256ab636-f768-4448-a2a1-4fcd6010f548", "metadata": {}, "outputs": [ @@ -268,7 +268,7 @@ "9 Health Care Quality, Access, and Evaluation 361" ] }, - "execution_count": 21, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -303,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "id": "1875525d-ea87-40d1-b540-69a8ea1c58af", "metadata": {}, "outputs": [ @@ -431,7 +431,7 @@ "9 343 " ] }, - "execution_count": 22, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -476,7 +476,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "id": "533365a8-9baf-4e03-a5c4-ff8fc1a186e7", "metadata": {}, "outputs": [ @@ -581,7 +581,7 @@ "9 Sensors (Basel, Switzerland) 51" ] }, - "execution_count": 23, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -612,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "id": "14559a81-bf60-4884-b28f-5cfcfe619456", "metadata": {}, "outputs": [ @@ -740,7 +740,7 @@ "9 42 " ] }, - "execution_count": 24, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -781,7 +781,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "id": "a39a85ea-e908-484d-b0a2-2c09bc4fe35a", "metadata": {}, "outputs": [ @@ -909,7 +909,7 @@ "9 9 " ] }, - "execution_count": 25, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -956,7 +956,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "id": "fb266223-0e09-414a-ada5-fdff0a9bfea1", "metadata": {}, "outputs": [ @@ -1061,7 +1061,7 @@ "9 Java 8" ] }, - "execution_count": 26, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1092,26 +1092,152 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "7b9e3806-f28b-473d-8e77-ca8bb029311b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results written to results/fig_10_top_programming_languages_per_year.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_yearlanguagenotebook_count
02010python58
12010unknown3
22011python40
32011unknown6
42012python182
............
782022sqlite31
792022unknown22
802022wren1
812023R1
822023python21
\n", + "

83 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " created_year language notebook_count\n", + "0 2010 python 58\n", + "1 2010 unknown 3\n", + "2 2011 python 40\n", + "3 2011 unknown 6\n", + "4 2012 python 182\n", + ".. ... ... ...\n", + "78 2022 sqlite3 1\n", + "79 2022 unknown 22\n", + "80 2022 wren 1\n", + "81 2023 R 1\n", + "82 2023 python 21\n", + "\n", + "[83 rows x 3 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query_string = \"\"\"\n", - "SELECT ?created_year ?language (COUNT(?notebook) as ?notebook_count)\n", + "SELECT ?created_year ?language (COUNT(?notebook) AS ?notebook_count)\n", "WHERE {\n", " ?notebook a ;\n", - " ?repository ;\n", + " ?repository ;\n", " ?language ;\n", " ?version .\n", " ?repository ?created_at .\n", - " BIND(REPLACE(str(?created_at), \"(\\\\d*)-.*\", \"$1\") AS ?created_year) \n", + " BIND(REPLACE(str(?created_at), \"^([0-9]{4})-.*\", \"$1\") AS ?created_year) \n", "}\n", "GROUP BY ?created_year ?language\n", "ORDER BY ?created_year ?language\n", - "\n", - "\n", "\"\"\"\n", + "\n", "csv_filename = 'fig_10_top_programming_languages_per_year'\n", "results = query_and_display_results(query_string, csv_filename)\n", "results" @@ -1127,10 +1253,137 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "936d93ba-9e7f-4f94-82cf-84f7f644f7a2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results written to results/fig_11_python_notebooks_by_minor_version_last_commit.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_yearminor_versioncount_minor_version
020103.427
120103.627
220103.71
320103.82
420103.91
............
1082022unk42
10920233.61
11020233.85
11120233.914
1122023unk1
\n", + "

113 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " created_year minor_version count_minor_version\n", + "0 2010 3.4 27\n", + "1 2010 3.6 27\n", + "2 2010 3.7 1\n", + "3 2010 3.8 2\n", + "4 2010 3.9 1\n", + ".. ... ... ...\n", + "108 2022 unk 42\n", + "109 2023 3.6 1\n", + "110 2023 3.8 5\n", + "111 2023 3.9 14\n", + "112 2023 unk 1\n", + "\n", + "[113 rows x 3 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query_string = \"\"\"\n", "SELECT ?created_year ?minor_version (COUNT(?notebook) as ?count_minor_version)\n", @@ -1140,7 +1393,7 @@ " \"python\" ;\n", " ?version .\n", " ?repository ?created_at .\n", - " BIND(REPLACE(str(?created_at), \"(\\\\d*)-.*\", \"$1\") AS ?created_year) \n", + " BIND(REPLACE(str(?created_at), \"^([0-9]{4})-.*\", \"$1\") AS ?created_year) \n", " BIND(SUBSTR(?version, 1, 3) AS ?minor_version)\n", " FILTER(?version != \"3\" && ?version != \"1\" && ?version != \"ES2015\")\n", "}\n", @@ -1164,10 +1417,316 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "68c9782b-f2f8-44a8-82c4-7447edb564f5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results written to results/fig_12_python_notebooks_by_major_version_first_commit.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_yearmajor_versioncount_major_version
02010358
1201122
22011338
3201224
420123178
52013241
620133282
72013u40
820142436
920143195
102014u294
1120152303
1220153705
132015u28
1420162374
1520163920
162016u53
1720172323
18201731368
192017u9
2020182256
21201832371
222018u295
2320192164
24201933915
252019u45
262020292
27202035707
282020u29
292021280
30202132604
312021u118
32202222
33202231146
342022u42
352023320
362023u1
\n", + "
" + ], + "text/plain": [ + " created_year major_version count_major_version\n", + "0 2010 3 58\n", + "1 2011 2 2\n", + "2 2011 3 38\n", + "3 2012 2 4\n", + "4 2012 3 178\n", + "5 2013 2 41\n", + "6 2013 3 282\n", + "7 2013 u 40\n", + "8 2014 2 436\n", + "9 2014 3 195\n", + "10 2014 u 294\n", + "11 2015 2 303\n", + "12 2015 3 705\n", + "13 2015 u 28\n", + "14 2016 2 374\n", + "15 2016 3 920\n", + "16 2016 u 53\n", + "17 2017 2 323\n", + "18 2017 3 1368\n", + "19 2017 u 9\n", + "20 2018 2 256\n", + "21 2018 3 2371\n", + "22 2018 u 295\n", + "23 2019 2 164\n", + "24 2019 3 3915\n", + "25 2019 u 45\n", + "26 2020 2 92\n", + "27 2020 3 5707\n", + "28 2020 u 29\n", + "29 2021 2 80\n", + "30 2021 3 2604\n", + "31 2021 u 118\n", + "32 2022 2 2\n", + "33 2022 3 1146\n", + "34 2022 u 42\n", + "35 2023 3 20\n", + "36 2023 u 1" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query_string = \"\"\"\n", "SELECT ?created_year ?major_version (COUNT(?notebook) as ?count_major_version)\n", @@ -1177,7 +1736,7 @@ " \"python\" ;\n", " ?version .\n", " ?repository ?created_at .\n", - " BIND(REPLACE(str(?created_at), \"(\\\\d*)-.*\", \"$1\") AS ?created_year) \n", + " BIND(REPLACE(str(?created_at), \"^([0-9]{4})-.*\", \"$1\") AS ?created_year) \n", " BIND(SUBSTR(?version, 1, 1) AS ?major_version)\n", " FILTER(?version != \"3\" && ?version != \"1\" && ?version != \"ES2015\")\n", "}\n", @@ -1201,7 +1760,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 15, "id": "1d5e330c-890a-46aa-898a-50613100ffd3", "metadata": {}, "outputs": [ @@ -1306,7 +1865,7 @@ "9 CalledProcessError 68" ] }, - "execution_count": 31, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1338,7 +1897,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 16, "id": "0a397aca-e6f5-4f6d-b8c5-6cff90719e5b", "metadata": {}, "outputs": [ @@ -1443,7 +2002,7 @@ "9 Natural Science Disciplines 3663" ] }, - "execution_count": 32, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1485,7 +2044,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 17, "id": "0b92d735-d8b2-4ebe-81be-0cd3d974cb36", "metadata": {}, "outputs": [ @@ -1538,7 +2097,7 @@ "0 324 879 1203" ] }, - "execution_count": 33, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1577,7 +2136,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 18, "id": "32a2f2fb-d951-4b4f-9c30-e01484f485f7", "metadata": {}, "outputs": [ @@ -1717,7 +2276,7 @@ "9 comparison to None should be 'if cond is None:' " ] }, - "execution_count": 34, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1745,93 +2304,6 @@ "# Other queries over the FAIR Jupyter graph" ] }, - { - "cell_type": "markdown", - "id": "027b66fc-8279-4d14-840b-f1b284048bfd", - "metadata": {}, - "source": [ - "## Notebooks by search term: 'immun' AND ('stem' OR 'differentiation')" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "8cddcac4-1898-470d-b12f-a960727b6943", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results written to results/notebooks:by_search_term.csv\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
notebook_urlarticle_labelkeywords
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [notebook_url, article_label, keywords]\n", - "Index: []" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query_string = \"\"\"\n", - "SELECT DISTINCT ?notebook_url ?article_label ?keywords WHERE { \n", - " ?article ?keywords .\n", - " ?article ?article_label . \n", - " ?article ?journal .\n", - " ?journal ?journal_label . \n", - " FILTER (REGEX(LCASE(CONCAT(?keywords, \" \", ?article_label, \" \", ?journal_label)), \"immun\"))\n", - " FILTER (REGEX(LCASE(CONCAT(?keywords, \" \", ?article_label, \" \", ?journal_label)), \"\\\\b(stem|differentiation)\"))\n", - " ?article ^ ?repository .\n", - " ?notebook ?repository .\n", - " ?notebook .\n", - " ?notebook ?notebook_label . # filename\n", - " ?repository ?repo_url_base . # find repo on GitHub\n", - " BIND(URI(CONCAT( ?repo_url_base, \"/blob/master/\", ?notebook_label)) AS ?notebook_url) # create clickable link to notebook on GitHub\n", - " FILTER (?notebook_url != \"\")\n", - "}\n", - "\n", - "\"\"\"\n", - "csv_filename = 'notebooks:by_search_term'\n", - "results = query_and_display_results(query_string, csv_filename)\n", - "results" - ] - }, { "cell_type": "markdown", "id": "35153af4-ae10-4515-a642-1ac9645902ff", @@ -1842,7 +2314,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 19, "id": "bc348244-0855-4627-b397-7a209b7439f3", "metadata": {}, "outputs": [ @@ -2267,7 +2739,7 @@ "53 African cities;Cote d’Ivoire;diarrhoea;landsca... " ] }, - "execution_count": 36, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -2295,7 +2767,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 20, "id": "7ecf1873-71ed-4d02-b775-c199b40ecd56", "metadata": {}, "outputs": [ @@ -2382,7 +2854,7 @@ "6 RuntimeError 1" ] }, - "execution_count": 37, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2417,7 +2889,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 21, "id": "46e9655b-96f7-4c5b-9bd5-e40a1a6c968a", "metadata": {}, "outputs": [ @@ -2534,7 +3006,7 @@ "11 TypeError 1" ] }, - "execution_count": 38, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2571,7 +3043,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 22, "id": "f0071524-0106-4fd5-b0c7-9214a604befa", "metadata": {}, "outputs": [ @@ -2685,7 +3157,7 @@ "[106 rows x 2 columns]" ] }, - "execution_count": 39, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2725,7 +3197,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 23, "id": "161d8410-b2c9-497b-8d61-3b2ac5b3e9fe", "metadata": {}, "outputs": [ @@ -2839,7 +3311,7 @@ "[4162 rows x 2 columns]" ] }, - "execution_count": 40, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2877,7 +3349,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 24, "id": "82e41f9c-e992-4c4c-bc11-00a9044d97d5", "metadata": {}, "outputs": [ @@ -3054,7 +3526,7 @@ "[100 rows x 4 columns]" ] }, - "execution_count": 41, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -3099,7 +3571,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 25, "id": "ed701131-6ff3-45d7-ad1e-fa7635e8ebab", "metadata": {}, "outputs": [ @@ -3263,7 +3735,7 @@ "[100 rows x 4 columns]" ] }, - "execution_count": 42, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -3307,7 +3779,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 26, "id": "fe8e4dbc-006a-4721-8dae-132bc6cc89e6", "metadata": {}, "outputs": [ @@ -3458,7 +3930,7 @@ "[100 rows x 4 columns]" ] }, - "execution_count": 43, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/FAIRJupyter_benchmark.ipynb b/notebooks/FAIRJupyter_benchmark.ipynb new file mode 100644 index 0000000..e03f346 --- /dev/null +++ b/notebooks/FAIRJupyter_benchmark.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "809c0a99-64fd-4f7f-87ca-3398dc6b1646", + "metadata": {}, + "source": [ + "# FAIR Jupyter Evaluation: Benchmark and Performance Metrics\n", + "FAIR Jupyter is a knowledge graph for semantic sharing and granular exploration of a computational notebook reproducibility dataset. This notebook provides some SPARQL queries to query the FAIR Jupyter SPARQL Endpoint.\n", + "More Information on FAIR Jupyter Ontology and Knowledge Graph: https://w3id.org/fairjupyter\n", + "\n", + "This notebook runs ten SPARQL queries on FAIR Jupyter and federated queries over Wikidata. It runs the notebook two times with different LIMITs. It logs the performance metrics when the queries are run with limit (e.g, 10, 100) and also logs the memory usage for each query execution.\n", + "\n", + "## Prerequisites\n", + "The notebook is written in Python and besides Jupyter and Wikidata, it has the following direct dependencies:\n", + "* sparqlwrapper to run SPARQL queries on Wikidata's SPARQL endpoint\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fe8b7723-9822-4ed6-aa0e-3323c07855ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sparqlwrapper in /home/shsam/miniconda3/lib/python3.12/site-packages (2.0.0)\n", + "Requirement already satisfied: rdflib>=6.1.1 in /home/shsam/miniconda3/lib/python3.12/site-packages (from sparqlwrapper) (7.0.0)\n", + "Requirement already satisfied: isodate<0.7.0,>=0.6.0 in /home/shsam/miniconda3/lib/python3.12/site-packages (from rdflib>=6.1.1->sparqlwrapper) (0.6.1)\n", + "Requirement already satisfied: pyparsing<4,>=2.1.0 in /home/shsam/miniconda3/lib/python3.12/site-packages (from rdflib>=6.1.1->sparqlwrapper) (3.1.2)\n", + "Requirement already satisfied: six in /home/shsam/miniconda3/lib/python3.12/site-packages (from isodate<0.7.0,>=0.6.0->rdflib>=6.1.1->sparqlwrapper) (1.16.0)\n" + ] + } + ], + "source": [ + "!pip install sparqlwrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bae6f238-2313-48cd-ae82-55c2ff01b63e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: memory-profiler in /home/shsam/miniconda3/lib/python3.12/site-packages (0.61.0)\n", + "Requirement already satisfied: psutil in /home/shsam/miniconda3/lib/python3.12/site-packages (from memory-profiler) (5.9.8)\n" + ] + } + ], + "source": [ + "!pip install memory-profiler" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "196d57d4-d0e1-4b3d-963a-1cb55bcc0ab2", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import pandas as pd\n", + "from SPARQLWrapper import SPARQLWrapper, JSON\n", + "import psutil\n" + ] + }, + { + "cell_type": "markdown", + "id": "27d8c428-12af-4990-839b-6b8b1492292b", + "metadata": {}, + "source": [ + "# SPARQL Endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b078a8b6-e755-4d15-8f5c-4bdfcb0af9ba", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the SPARQL endpoint\n", + "sparql = SPARQLWrapper(\"https://reproduceme.uni-jena.de/fairjupyter/sparql\")" + ] + }, + { + "cell_type": "markdown", + "id": "e99839aa-3f6b-434e-a35b-0ef1e1404eda", + "metadata": {}, + "source": [ + "# Selected SPARQL queries and federated queries over Wikidata" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0324f264-da4b-43a4-afdf-f245464bfab2", + "metadata": {}, + "outputs": [], + "source": [ + "# List of queries with placeholders for LIMIT\n", + "queries = [\n", + " \"\"\"\n", + " SELECT ?research_field (COUNT(DISTINCT ?article) AS ?number_of_articles)\n", + " WHERE {{ \n", + " ?repository ?article .\n", + " ?article ?mesh .\n", + " ?mesh ?top_mesh .\n", + " ?top_mesh ?research_field\n", + " \n", + " }}\n", + " GROUP BY ?research_field\n", + " ORDER BY DESC(?number_of_articles)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " SELECT ?journal_name (COUNT(?article) as ?article_count)\n", + " WHERE {{\n", + " ?article ?journal .\n", + " ?journal ?journal_name .\n", + " }}\n", + " GROUP BY ?journal_name\n", + " ORDER BY DESC(?article_count)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " SELECT ?language (COUNT(?notebook) as ?notebook_count)\n", + " WHERE {{\n", + " ?notebook a ;\n", + " ?language .\n", + " }}\n", + " GROUP BY ?language\n", + " ORDER BY DESC(?notebook_count)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " SELECT ?exception (COUNT(?exception) AS ?count)\n", + " WHERE {{\n", + " ?execution a ;\n", + " ?exception .\n", + " }}\n", + " GROUP BY ?exception\n", + " ORDER BY DESC(?count)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " PREFIX xsd: \n", + " SELECT DISTINCT ?research_field (COUNT(?exception) AS ?exception_count)\n", + " WHERE {{ \n", + " ?execution a ;\n", + " ?exception ;\n", + " ?repository .\n", + " ?repository a ;\n", + " \t\t\t ?article ;\n", + " \t\t\t ?notebooks_count .\n", + " ?article a ; \n", + " \t\t ?mesh .\n", + " ?mesh ?top_mesh .\n", + " ?top_mesh ?research_field . \n", + " FILTER (xsd:integer(?notebooks_count)>0)\n", + " }}\n", + " GROUP BY ?research_field\n", + " ORDER BY DESC(?exception_count)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " SELECT ?notebook ?error ?description\n", + " WHERE {{\n", + " ?error a ;\n", + " ?description ;\n", + " ?notebook .\n", + " }}\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " SELECT DISTINCT ?article ?keywords WHERE {{ \n", + " ?article ?keywords .\n", + " FILTER (REGEX(LCASE(?keywords), \"open(.)source\"))\n", + " }}\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " PREFIX xsd: \n", + " SELECT DISTINCT ?repo ?stargazers_count WHERE {{\n", + " ?repo ?count. \n", + " BIND(xsd:float(?count) AS ?stargazers_count)\n", + " FILTER ((?stargazers_count) > 0)\n", + " }}\n", + " ORDER BY DESC(?stargazers_count)\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " PREFIX rdfs: \n", + "\n", + " PREFIX wikidata_wd: \n", + " PREFIX wikidata_wdt: \n", + " \n", + " SELECT DISTINCT\n", + " \n", + " ?fj_article\n", + " ?wikidata\n", + " ?wikidata_label\n", + " ?DOI\n", + " \n", + " WHERE {{\n", + " ?fj_article ?doi .\n", + " BIND(UCASE(?doi) AS ?DOI)\n", + " service {{\n", + " ?wikidata wikidata_wdt:P356 ?DOI .\n", + " ?wikidata rdfs:label ?wikidata_label .\n", + " FILTER (LANG(?wikidata_label) = \"en\")\n", + " }}\n", + " }}\n", + " LIMIT {}\n", + " \"\"\",\n", + " \"\"\"\n", + " PREFIX rdfs: \n", + "\n", + " PREFIX wikidata_wd: \n", + " PREFIX wikidata_wdt: \n", + " \n", + " SELECT DISTINCT\n", + " \n", + " ?fj_article\n", + " ?wikidata\n", + " ?wikidata_label\n", + " ?pmc\n", + " \n", + " WHERE {{\n", + " ?fj_article ?pmc .\n", + " service {{\n", + " ?wikidata wikidata_wdt:P932 ?pmc .\n", + " ?wikidata rdfs:label ?wikidata_label .\n", + " FILTER (LANG(?wikidata_label) = \"en\")\n", + " }}\n", + " }}\n", + " LIMIT {}\n", + " \"\"\",\n", + "]\n" + ] + }, + { + "cell_type": "markdown", + "id": "cb7135c1-ca3f-42e2-834c-530edb9b8dfb", + "metadata": {}, + "source": [ + "# Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3f78a669-65f6-4ba1-953e-4470b39ff200", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to execute the SPARQL query and manually measure memory\n", + "def execute_query(query, limit):\n", + " query_with_limit = query.format(limit)\n", + " \n", + " # Measure memory and time\n", + " start_time = time.time()\n", + " process = psutil.Process() # Get current process\n", + " mem_before = process.memory_info().rss / (1024 * 1024) # Memory in MB\n", + "\n", + " sparql.setQuery(query_with_limit)\n", + " sparql.setReturnFormat(JSON)\n", + " sparql.query().convert()\n", + " \n", + " mem_after = process.memory_info().rss / (1024 * 1024) # Memory in MB\n", + " end_time = time.time()\n", + " \n", + " execution_time = end_time - start_time\n", + " memory_used = mem_after - mem_before\n", + " \n", + " return execution_time, memory_used" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "821cb71d-8e36-49ee-b669-2dfe838e318e", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to run all queries for a specific LIMIT and log time/memory\n", + "def run_queries_with_limit(limit):\n", + " results = []\n", + " total_start_time = time.time()\n", + " total_mem_usage = []\n", + "\n", + " for i, query in enumerate(queries):\n", + " print(f\"Running Query {i+1} with LIMIT {limit}...\")\n", + " \n", + " # Run the query and log time/memory\n", + " execution_time, mem_usage = execute_query(query, limit) \n", + " print(f\"execution_time: {execution_time}\")\n", + " print(f\"mem_usage: {mem_usage}\")\n", + " time.sleep(2) # Delay for 2 seconds between queries\n", + " \n", + " if execution_time is not None and mem_usage is not None:\n", + " total_mem_usage.append(mem_usage)\n", + " results.append({\n", + " 'Query': i + 1,\n", + " 'LIMIT': limit,\n", + " 'Execution Time (s)': execution_time,\n", + " 'Memory Usage (MB)': mem_usage\n", + " })\n", + " else:\n", + " print(f\"Skipping Query {i+1} due to an error.\")\n", + " \n", + " total_end_time = time.time()\n", + " total_execution_time = total_end_time - total_start_time\n", + " total_memory_usage = max(total_mem_usage) if total_mem_usage else None # Overall peak memory usage\n", + " \n", + " # Log the total memory/time for the whole notebook\n", + " results.append({\n", + " 'Query': 'Total',\n", + " 'LIMIT': limit,\n", + " 'Execution Time (s)': total_execution_time,\n", + " 'Memory Usage (MB)': total_memory_usage\n", + " })\n", + " \n", + " return results" + ] + }, + { + "cell_type": "markdown", + "id": "4c035a27-1ccc-47b8-8b6c-903360435835", + "metadata": {}, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2cb8f825-3974-41b5-b10d-ab2ba7ac8a91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Query 1 with LIMIT 10...\n", + "execution_time: 4.4106605052948\n", + "mem_usage: 1.5\n", + "Running Query 2 with LIMIT 10...\n", + "execution_time: 0.16930508613586426\n", + "mem_usage: 0.0\n", + "Running Query 3 with LIMIT 10...\n", + "execution_time: 0.3337666988372803\n", + "mem_usage: 0.0\n", + "Running Query 4 with LIMIT 10...\n", + "execution_time: 0.19837260246276855\n", + "mem_usage: 0.0\n", + "Running Query 5 with LIMIT 10...\n", + "execution_time: 1.2284893989562988\n", + "mem_usage: 0.0\n", + "Running Query 6 with LIMIT 10...\n", + "execution_time: 0.12428760528564453\n", + "mem_usage: 0.0\n", + "Running Query 7 with LIMIT 10...\n", + "execution_time: 0.12464475631713867\n", + "mem_usage: 0.0\n", + "Running Query 8 with LIMIT 10...\n", + "execution_time: 0.1638171672821045\n", + "mem_usage: 0.0\n", + "Running Query 9 with LIMIT 10...\n", + "execution_time: 2.3390748500823975\n", + "mem_usage: 0.0\n", + "Running Query 10 with LIMIT 10...\n", + "execution_time: 2.6051535606384277\n", + "mem_usage: 0.0\n", + "Running Query 1 with LIMIT 100...\n", + "execution_time: 4.086849927902222\n", + "mem_usage: 0.125\n", + "Running Query 2 with LIMIT 100...\n", + "execution_time: 0.22158551216125488\n", + "mem_usage: 0.0\n", + "Running Query 3 with LIMIT 100...\n", + "execution_time: 0.3401978015899658\n", + "mem_usage: 0.0\n", + "Running Query 4 with LIMIT 100...\n", + "execution_time: 0.23847579956054688\n", + "mem_usage: 0.0\n", + "Running Query 5 with LIMIT 100...\n", + "execution_time: 1.41654634475708\n", + "mem_usage: 0.0\n", + "Running Query 6 with LIMIT 100...\n", + "execution_time: 0.18763065338134766\n", + "mem_usage: 0.0\n", + "Running Query 7 with LIMIT 100...\n", + "execution_time: 0.16681241989135742\n", + "mem_usage: 0.0\n", + "Running Query 8 with LIMIT 100...\n", + "execution_time: 0.4047880172729492\n", + "mem_usage: 0.0\n", + "Running Query 9 with LIMIT 100...\n", + "execution_time: 15.250993251800537\n", + "mem_usage: 0.25\n", + "Running Query 10 with LIMIT 100...\n", + "execution_time: 23.499578714370728\n", + "mem_usage: 0.0\n", + "Running Query 1 with LIMIT 1000...\n", + "execution_time: 4.16658878326416\n", + "mem_usage: 0.0\n", + "Running Query 2 with LIMIT 1000...\n", + "execution_time: 0.4732682704925537\n", + "mem_usage: 0.875\n", + "Running Query 3 with LIMIT 1000...\n", + "execution_time: 0.3411281108856201\n", + "mem_usage: 0.0\n", + "Running Query 4 with LIMIT 1000...\n", + "execution_time: 0.27576780319213867\n", + "mem_usage: 0.0\n", + "Running Query 5 with LIMIT 1000...\n", + "execution_time: 1.2777421474456787\n", + "mem_usage: 0.0\n", + "Running Query 6 with LIMIT 1000...\n", + "execution_time: 1.1799492835998535\n", + "mem_usage: 0.625\n", + "Running Query 7 with LIMIT 1000...\n", + "execution_time: 0.16412019729614258\n", + "mem_usage: 0.0\n", + "Running Query 8 with LIMIT 1000...\n", + "execution_time: 0.6738555431365967\n", + "mem_usage: 0.0\n", + "Running Query 9 with LIMIT 1000...\n", + "execution_time: 127.23007845878601\n", + "mem_usage: -0.05078125\n", + "Running Query 10 with LIMIT 1000...\n", + "execution_time: 223.4653778076172\n", + "mem_usage: 0.0859375\n", + "Benchmarking complete. Results saved to 'sparql_benchmark_results.csv'.\n" + ] + } + ], + "source": [ + "\n", + "# Run the notebook two times with different LIMITs\n", + "all_results = []\n", + "for limit in [10, 100, 1000]:\n", + " run_results = run_queries_with_limit(limit)\n", + " all_results.extend(run_results)\n", + "\n", + "# Save results to a DataFrame and export to CSV\n", + "df_results = pd.DataFrame(all_results)\n", + "df_results.to_csv('sparql_benchmark_results.csv', index=False)\n", + "\n", + "print(\"Benchmarking complete. Results saved to 'sparql_benchmark_results.csv'.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "83eb843a-4ab3-422a-9f45-579843968581", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting seaborn\n", + " Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /home/shsam/miniconda3/lib/python3.12/site-packages (from seaborn) (1.26.4)\n", + "Requirement already satisfied: pandas>=1.2 in /home/shsam/miniconda3/lib/python3.12/site-packages (from seaborn) (2.2.2)\n", + "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /home/shsam/miniconda3/lib/python3.12/site-packages (from seaborn) (3.9.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.53.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (23.2)\n", + "Requirement already satisfied: pillow>=8 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /home/shsam/miniconda3/lib/python3.12/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/shsam/miniconda3/lib/python3.12/site-packages (from pandas>=1.2->seaborn) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/shsam/miniconda3/lib/python3.12/site-packages (from pandas>=1.2->seaborn) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /home/shsam/miniconda3/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)\n", + "Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.9/294.9 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: seaborn\n", + "Successfully installed seaborn-0.13.2\n" + ] + } + ], + "source": [ + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "81175f37-5a6f-427c-b3ec-ab9d1b195d06", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Load the benchmark results from the CSV file\n", + "df_results = pd.read_csv('sparql_benchmark_results.csv')\n", + "\n", + "# Set seaborn style for better aesthetics\n", + "sns.set(style=\"whitegrid\")\n", + "\n", + "# Create a figure with two subplots: one for execution time, one for memory usage\n", + "fig, axs = plt.subplots(2, 1, figsize=(10, 8))\n", + "\n", + "# Plot Execution Time\n", + "sns.lineplot(\n", + " data=df_results, \n", + " x='LIMIT', \n", + " y='Execution Time (s)', \n", + " hue='Query', \n", + " marker=\"o\", \n", + " ax=axs[0]\n", + ")\n", + "axs[0].set_title('SPARQL Query Execution Time by LIMIT')\n", + "axs[0].set_ylabel('Execution Time (s)')\n", + "axs[0].set_xlabel('LIMIT')\n", + "axs[0].legend(title='Query', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "\n", + "# Plot Memory Usage\n", + "sns.lineplot(\n", + " data=df_results, \n", + " x='LIMIT', \n", + " y='Memory Usage (MB)', \n", + " hue='Query', \n", + " marker=\"o\", \n", + " ax=axs[1]\n", + ")\n", + "axs[1].set_title('SPARQL Query Memory Usage by LIMIT')\n", + "axs[1].set_ylabel('Memory Usage (MB)')\n", + "axs[1].set_xlabel('LIMIT')\n", + "axs[1].legend(title='Query', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "74382bce-26a7-482c-b040-6ef768de32c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Plot has been saved as 'sparql_benchmark_results.pdf'\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Load the benchmark results from the CSV file (replace with your actual file path)\n", + "df_results = pd.read_csv('sparql_benchmark_results.csv')\n", + "\n", + "# Set seaborn style for better aesthetics\n", + "sns.set(style=\"whitegrid\")\n", + "\n", + "# Create a figure with two subplots: one for execution time, one for memory usage\n", + "fig, axs = plt.subplots(1, 1, figsize=(8, 4))\n", + "\n", + "# Plot Execution Time\n", + "sns.lineplot(\n", + " data=df_results, \n", + " x='LIMIT', \n", + " y='Execution Time (s)', \n", + " hue='Query', \n", + " marker=\"o\", \n", + " ax=axs\n", + ")\n", + "axs.set_title('SPARQL Query Execution Time by LIMIT')\n", + "axs.set_ylabel('Execution Time (s)')\n", + "axs.set_xlabel('LIMIT')\n", + "axs.legend(title='Query', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "\n", + "\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "\n", + "# Save the figure as a PDF\n", + "fig.savefig('sparql_benchmark_results.pdf')\n", + "\n", + "print(\"Plot has been saved as 'sparql_benchmark_results.pdf'\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45ac8db6-2b8d-421d-8152-42d179259feb", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}