-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
changes to pipeline include scaling and enumerating glm variants
- Loading branch information
1 parent
1c10522
commit 9856531
Showing
10 changed files
with
5,056 additions
and
18,221 deletions.
There are no files selected for viewing
189 changes: 189 additions & 0 deletions
189
feature_importance/subgroup/current/openml-classification.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import openml\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import warnings\n", | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"warnings.filterwarnings(action='ignore', category=FutureWarning,\n", | ||
" module='openml')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dataids = [31, 10101, 3913, 3, 3917, 9957, 9946, 3918, 3903, 37, 9971, 9952,\n", | ||
" 3902, 49, 43, 9978, 10093, 219, 9976, 14965, 6, 9977, 53, 11, 15, 16,\n", | ||
" 14, 32, 3549, 12, 9981, 18, 28, 2074, 29, 45, 125922, 9960, 9964, 22,\n", | ||
" 2079, 14969, 3560, 14952, 125920, 23, 3904, 3022, 9985, 9910, 14970,\n", | ||
" 3021, 3481, 7592, 3573, 146824, 146820, 146822, 146195, 146800, 146817,\n", | ||
" 146819, 146821, 167119, 14954, 167141, 167140, 167120, 167125, 146825,\n", | ||
" 167124, 167121]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 26, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"72" | ||
] | ||
}, | ||
"execution_count": 26, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"len(dataids)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"3917 0 False\n", | ||
"1 True\n", | ||
"2 True\n", | ||
"3 True\n", | ||
"4 True\n", | ||
" ... \n", | ||
"2104 False\n", | ||
"2105 False\n", | ||
"2106 False\n", | ||
"2107 False\n", | ||
"2108 False\n", | ||
"Name: defects, Length: 2109, dtype: bool\n", | ||
"3918 0 False\n", | ||
"1 True\n", | ||
"2 True\n", | ||
"3 True\n", | ||
"4 True\n", | ||
" ... \n", | ||
"1104 False\n", | ||
"1105 False\n", | ||
"1106 False\n", | ||
"1107 False\n", | ||
"1108 False\n", | ||
"Name: defects, Length: 1109, dtype: bool\n", | ||
"3903 0 False\n", | ||
"1 False\n", | ||
"2 False\n", | ||
"3 False\n", | ||
"4 False\n", | ||
" ... \n", | ||
"1558 False\n", | ||
"1559 False\n", | ||
"1560 False\n", | ||
"1561 False\n", | ||
"1562 False\n", | ||
"Name: c, Length: 1563, dtype: bool\n", | ||
"3902 0 False\n", | ||
"1 False\n", | ||
"2 False\n", | ||
"3 False\n", | ||
"4 False\n", | ||
" ... \n", | ||
"1453 False\n", | ||
"1454 False\n", | ||
"1455 False\n", | ||
"1456 False\n", | ||
"1457 False\n", | ||
"Name: c, Length: 1458, dtype: bool\n", | ||
"3904 0 False\n", | ||
"1 True\n", | ||
"2 True\n", | ||
"3 True\n", | ||
"4 True\n", | ||
" ... \n", | ||
"10880 False\n", | ||
"10881 False\n", | ||
"10882 False\n", | ||
"10883 False\n", | ||
"10884 False\n", | ||
"Name: defects, Length: 10885, dtype: bool\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"categorical_counter = 0\n", | ||
"binary_ids = []\n", | ||
"for id in dataids:\n", | ||
" task = openml.tasks.get_task(id, download_splits=False, download_data=False,\n", | ||
" download_features_meta_data=False,\n", | ||
" download_qualities=False)\n", | ||
" dataset = task.get_dataset()\n", | ||
" X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)\n", | ||
" # get # of unique values in y\n", | ||
" if len(y.unique()) > 2:\n", | ||
" # print(id, 'multiclass')\n", | ||
" pass\n", | ||
" if isinstance(y.dtype, pd.CategoricalDtype):\n", | ||
" categorical_counter += 1\n", | ||
" # print(id, 'categorical')\n", | ||
" # print(y)\n", | ||
" y = pd.get_dummies(y, drop_first=True, dtype=float)\n", | ||
" else:\n", | ||
" print(id, y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 28, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[]" | ||
] | ||
}, | ||
"execution_count": 28, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"binary_ids" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "mdi", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.14" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.