Skip to content

Commit

Permalink
changes to pipeline include scaling and enumerating glm variants
Browse files Browse the repository at this point in the history
  • Loading branch information
zachrewolinski committed Jan 7, 2025
1 parent 1c10522 commit 9856531
Show file tree
Hide file tree
Showing 10 changed files with 5,056 additions and 18,221 deletions.
189 changes: 189 additions & 0 deletions feature_importance/subgroup/current/openml-classification.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"import openml\n",
"import matplotlib.pyplot as plt\n",
"import warnings\n",
"import pandas as pd\n",
"import numpy as np\n",
"warnings.filterwarnings(action='ignore', category=FutureWarning,\n",
" module='openml')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"dataids = [31, 10101, 3913, 3, 3917, 9957, 9946, 3918, 3903, 37, 9971, 9952,\n",
" 3902, 49, 43, 9978, 10093, 219, 9976, 14965, 6, 9977, 53, 11, 15, 16,\n",
" 14, 32, 3549, 12, 9981, 18, 28, 2074, 29, 45, 125922, 9960, 9964, 22,\n",
" 2079, 14969, 3560, 14952, 125920, 23, 3904, 3022, 9985, 9910, 14970,\n",
" 3021, 3481, 7592, 3573, 146824, 146820, 146822, 146195, 146800, 146817,\n",
" 146819, 146821, 167119, 14954, 167141, 167140, 167120, 167125, 146825,\n",
" 167124, 167121]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"72"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dataids)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3917 0 False\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
" ... \n",
"2104 False\n",
"2105 False\n",
"2106 False\n",
"2107 False\n",
"2108 False\n",
"Name: defects, Length: 2109, dtype: bool\n",
"3918 0 False\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
" ... \n",
"1104 False\n",
"1105 False\n",
"1106 False\n",
"1107 False\n",
"1108 False\n",
"Name: defects, Length: 1109, dtype: bool\n",
"3903 0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
" ... \n",
"1558 False\n",
"1559 False\n",
"1560 False\n",
"1561 False\n",
"1562 False\n",
"Name: c, Length: 1563, dtype: bool\n",
"3902 0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
" ... \n",
"1453 False\n",
"1454 False\n",
"1455 False\n",
"1456 False\n",
"1457 False\n",
"Name: c, Length: 1458, dtype: bool\n",
"3904 0 False\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
" ... \n",
"10880 False\n",
"10881 False\n",
"10882 False\n",
"10883 False\n",
"10884 False\n",
"Name: defects, Length: 10885, dtype: bool\n"
]
}
],
"source": [
"categorical_counter = 0\n",
"binary_ids = []\n",
"for id in dataids:\n",
" task = openml.tasks.get_task(id, download_splits=False, download_data=False,\n",
" download_features_meta_data=False,\n",
" download_qualities=False)\n",
" dataset = task.get_dataset()\n",
" X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)\n",
" # get # of unique values in y\n",
" if len(y.unique()) > 2:\n",
" # print(id, 'multiclass')\n",
" pass\n",
" if isinstance(y.dtype, pd.CategoricalDtype):\n",
" categorical_counter += 1\n",
" # print(id, 'categorical')\n",
" # print(y)\n",
" y = pd.get_dummies(y, drop_first=True, dtype=float)\n",
" else:\n",
" print(id, y)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"binary_ids"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mdi",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 9856531

Please sign in to comment.