Skip to content

Comments

Kaggle Notebook | Physical Activity Prediction | Version 3#1

Open
ManviNagdev wants to merge 1 commit intomainfrom
KNN
Open

Kaggle Notebook | Physical Activity Prediction | Version 3#1
ManviNagdev wants to merge 1 commit intomainfrom
KNN

Conversation

@ManviNagdev
Copy link
Owner

No description provided.

@ManviNagdev ManviNagdev requested a review from coderpotter August 6, 2023 23:00
@ManviNagdev ManviNagdev self-assigned this Aug 6, 2023
@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #2.    y = dataset.iloc[:, 0].values

activity_id = dataset.activityID

Convert it to a list if you need to but generally pandas DSs are more efficient.


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #4.    print(y)

don't use print statements in jupyterhub unless you want formatted output


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #3.    from matplotlib import pyplot

Handle imports in one of these 2 ways:

  1. All at the top in the 1st cell (alphabetically sorted)
  2. All the ones relevant for a section at the top of each section (alphabetically sorted)

Convention is to write import matplotlib.pyplot as plt


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #6.    y_temp = le.fit_transform(y)

Rename. If a variable is truly temporary (which this is not), adding an _ at the end should suffice.


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #4.    X = imputer.transform(X)

create a new variable. Now X is lost forever


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #2.    print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

no print needed


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a section?


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DYM method 3? This section needs a lot of work. Retry


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #1.    from imblearn.over_sampling import SMOTE

why SMOTE? Did you try anything else? Do you think oversampling is the best way to go here?


Reply via ReviewNB

@@ -0,0 +1 @@
{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/manvinagdev/physical-activity-prediction?scriptVersionId=138941879\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown"},{"cell_type":"markdown","id":"fea86c76","metadata":{"papermill":{"duration":0.008893,"end_time":"2023-08-05T00:06:53.536089","exception":false,"start_time":"2023-08-05T00:06:53.527196","status":"completed"},"tags":[]},"source":["## Importing libraries"]},{"cell_type":"code","execution_count":1,"id":"89d8bcdb","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-08-05T00:06:53.554723Z","iopub.status.busy":"2023-08-05T00:06:53.554321Z","iopub.status.idle":"2023-08-05T00:07:02.412402Z","shell.execute_reply":"2023-08-05T00:07:02.410962Z"},"papermill":{"duration":8.870272,"end_time":"2023-08-05T00:07:02.414899","exception":false,"start_time":"2023-08-05T00:06:53.544627","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n"," warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n"," warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n","/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\n","caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n"," warnings.warn(f\"file system plugins are not loaded: {e}\")\n"]},{"name":"stdout","output_type":"stream","text":["/kaggle/input/fisical-activity-dataset/dataset2.csv\n"]}],"source":["import numpy as np\n","import pandas as pd\n","import tensorflow as tf\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"markdown","id":"e1dee26f","metadata":{"papermill":{"duration":0.008233,"end_time":"2023-08-05T00:07:02.431643","exception":false,"start_time":"2023-08-05T00:07:02.42341","status":"completed"},"tags":[]},"source":["## Loading the dataset"]},{"cell_type":"code","execution_count":2,"id":"ce6fe559","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:02.452779Z","iopub.status.busy":"2023-08-05T00:07:02.451213Z","iopub.status.idle":"2023-08-05T00:07:24.443285Z","shell.execute_reply":"2023-08-05T00:07:24.442116Z"},"papermill":{"duration":22.004325,"end_time":"2023-08-05T00:07:24.445861","exception":false,"start_time":"2023-08-05T00:07:02.441536","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>activityID</th>\n"," <th>heart_rate</th>\n"," <th>hand temperature (°C)</th>\n"," <th>hand acceleration X ±16g</th>\n"," <th>hand acceleration Y ±16g</th>\n"," <th>hand acceleration Z ±16g</th>\n"," <th>hand gyroscope X</th>\n"," <th>hand gyroscope Y</th>\n"," <th>hand gyroscope Z</th>\n"," <th>hand magnetometer X</th>\n"," <th>...</th>\n"," <th>ankle acceleration X ±16g</th>\n"," <th>ankle acceleration Y ±16g</th>\n"," <th>ankle acceleration Z ±16g</th>\n"," <th>ankle gyroscope X</th>\n"," <th>ankle gyroscope Y</th>\n"," <th>ankle gyroscope Z</th>\n"," <th>ankle magnetometer X</th>\n"," <th>ankle magnetometer Y</th>\n"," <th>ankle magnetometer Z</th>\n"," <th>PeopleId</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37223</td>\n"," <td>8.60074</td>\n"," <td>3.51048</td>\n"," <td>-0.092217</td>\n"," <td>0.056812</td>\n"," <td>-0.015845</td>\n"," <td>14.6806</td>\n"," <td>...</td>\n"," <td>9.65918</td>\n"," <td>-1.65569</td>\n"," <td>-0.099797</td>\n"," <td>0.008300</td>\n"," <td>0.009250</td>\n"," <td>-0.017580</td>\n"," <td>-61.1888</td>\n"," <td>-38.9599</td>\n"," <td>-58.1438</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.18837</td>\n"," <td>8.56560</td>\n"," <td>3.66179</td>\n"," <td>-0.024413</td>\n"," <td>0.047759</td>\n"," <td>0.006474</td>\n"," <td>14.8991</td>\n"," <td>...</td>\n"," <td>9.69370</td>\n"," <td>-1.57902</td>\n"," <td>-0.215687</td>\n"," <td>-0.006577</td>\n"," <td>-0.004638</td>\n"," <td>0.000368</td>\n"," <td>-59.8479</td>\n"," <td>-38.8919</td>\n"," <td>-58.5253</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.37357</td>\n"," <td>8.60107</td>\n"," <td>3.54898</td>\n"," <td>-0.057976</td>\n"," <td>0.032574</td>\n"," <td>-0.006988</td>\n"," <td>14.2420</td>\n"," <td>...</td>\n"," <td>9.58944</td>\n"," <td>-1.73276</td>\n"," <td>0.092914</td>\n"," <td>0.003014</td>\n"," <td>0.000148</td>\n"," <td>0.022495</td>\n"," <td>-60.7361</td>\n"," <td>-39.4138</td>\n"," <td>-58.3999</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.07473</td>\n"," <td>8.52853</td>\n"," <td>3.66021</td>\n"," <td>-0.002352</td>\n"," <td>0.032810</td>\n"," <td>-0.003747</td>\n"," <td>14.8908</td>\n"," <td>...</td>\n"," <td>9.58814</td>\n"," <td>-1.77040</td>\n"," <td>0.054545</td>\n"," <td>0.003175</td>\n"," <td>-0.020301</td>\n"," <td>0.011275</td>\n"," <td>-60.4091</td>\n"," <td>-38.7635</td>\n"," <td>-58.3956</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>transient activities</td>\n"," <td>104.0</td>\n"," <td>30.0</td>\n"," <td>2.22936</td>\n"," <td>8.83122</td>\n"," <td>3.70000</td>\n"," <td>0.012269</td>\n"," <td>0.018305</td>\n"," <td>-0.053325</td>\n"," <td>15.5612</td>\n"," <td>...</td>\n"," <td>9.69771</td>\n"," <td>-1.65625</td>\n"," <td>-0.060809</td>\n"," <td>0.012698</td>\n"," <td>-0.014303</td>\n"," <td>-0.002823</td>\n"," <td>-61.5199</td>\n"," <td>-39.3879</td>\n"," <td>-58.2694</td>\n"," <td>1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 33 columns</p>\n","</div>"],"text/plain":[" activityID heart_rate hand temperature (°C) \\\n","0 transient activities 104.0 30.0 \n","1 transient activities 104.0 30.0 \n","2 transient activities 104.0 30.0 \n","3 transient activities 104.0 30.0 \n","4 transient activities 104.0 30.0 \n","\n"," hand acceleration X ±16g hand acceleration Y ±16g \\\n","0 2.37223 8.60074 \n","1 2.18837 8.56560 \n","2 2.37357 8.60107 \n","3 2.07473 8.52853 \n","4 2.22936 8.83122 \n","\n"," hand acceleration Z ±16g hand gyroscope X hand gyroscope Y \\\n","0 3.51048 -0.092217 0.056812 \n","1 3.66179 -0.024413 0.047759 \n","2 3.54898 -0.057976 0.032574 \n","3 3.66021 -0.002352 0.032810 \n","4 3.70000 0.012269 0.018305 \n","\n"," hand gyroscope Z hand magnetometer X ... ankle acceleration X ±16g \\\n","0 -0.015845 14.6806 ... 9.65918 \n","1 0.006474 14.8991 ... 9.69370 \n","2 -0.006988 14.2420 ... 9.58944 \n","3 -0.003747 14.8908 ... 9.58814 \n","4 -0.053325 15.5612 ... 9.69771 \n","\n"," ankle acceleration Y ±16g ankle acceleration Z ±16g ankle gyroscope X \\\n","0 -1.65569 -0.099797 0.008300 \n","1 -1.57902 -0.215687 -0.006577 \n","2 -1.73276 0.092914 0.003014 \n","3 -1.77040 0.054545 0.003175 \n","4 -1.65625 -0.060809 0.012698 \n","\n"," ankle gyroscope Y ankle gyroscope Z ankle magnetometer X \\\n","0 0.009250 -0.017580 -61.1888 \n","1 -0.004638 0.000368 -59.8479 \n","2 0.000148 0.022495 -60.7361 \n","3 -0.020301 0.011275 -60.4091 \n","4 -0.014303 -0.002823 -61.5199 \n","\n"," ankle magnetometer Y ankle magnetometer Z PeopleId \n","0 -38.9599 -58.1438 1 \n","1 -38.8919 -58.5253 1 \n","2 -39.4138 -58.3999 1 \n","3 -38.7635 -58.3956 1 \n","4 -39.3879 -58.2694 1 \n","\n","[5 rows x 33 columns]"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = pd.read_csv('/kaggle/input/fisical-activity-dataset/dataset2.csv')\n","dataset.head()"]},{"cell_type":"code","execution_count":3,"id":"4c7d3445","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.466165Z","iopub.status.busy":"2023-08-05T00:07:24.465496Z","iopub.status.idle":"2023-08-05T00:07:24.70062Z","shell.execute_reply":"2023-08-05T00:07:24.699179Z"},"papermill":{"duration":0.24804,"end_time":"2023-08-05T00:07:24.703053","exception":false,"start_time":"2023-08-05T00:07:24.455013","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[104. 30. 2.37223 ... -61.1888 -38.9599 -58.1438 ]\n"," [104. 30. 2.18837 ... -59.8479 -38.8919 -58.5253 ]\n"," [104. 30. 2.37357 ... -60.7361 -39.4138 -58.3999 ]\n"," ...\n"," [140. 30.8125 -9.42745 ... -38.5541 -16.0535 24.6936 ]\n"," [140. 30.8125 -9.47246 ... -38.8064 -16.04 24.9763 ]\n"," [140. 30.8125 -9.66621 ... -38.6814 -15.9175 24.9766 ]]\n","['transient activities' 'transient activities' 'transient activities' ...\n"," 'transient activities' 'transient activities' 'transient activities']\n"]}],"source":["X = dataset.iloc[:, 1:-1].values\n","y = dataset.iloc[:, 0].values\n","print(X)\n","print(y)"]},{"cell_type":"markdown","id":"e73af995","metadata":{"papermill":{"duration":0.00873,"end_time":"2023-08-05T00:07:24.720797","exception":false,"start_time":"2023-08-05T00:07:24.712067","status":"completed"},"tags":[]},"source":["## Encoding categorical data"]},{"cell_type":"code","execution_count":4,"id":"b2055003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:24.740481Z","iopub.status.busy":"2023-08-05T00:07:24.740069Z","iopub.status.idle":"2023-08-05T00:07:26.930985Z","shell.execute_reply":"2023-08-05T00:07:26.929671Z"},"papermill":{"duration":2.203646,"end_time":"2023-08-05T00:07:26.933552","exception":false,"start_time":"2023-08-05T00:07:24.729906","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["['Nordic walking', 'ascending stairs', 'cycling', 'descending stairs', 'ironing', 'lying', 'rope jumping', 'running', 'sitting', 'standing', 'transient activities', 'vacuum cleaning', 'walking']\n","Counter({10: 927575, 12: 238761, 4: 238690, 5: 192523, 9: 189931, 0: 188107, 8: 185188, 11: 175353, 2: 164600, 1: 117216, 3: 104944, 7: 98199, 6: 42969})\n","Class=10, n=927575 (32.387%)\n","Class=5, n=192523 (6.722%)\n","Class=8, n=185188 (6.466%)\n","Class=9, n=189931 (6.632%)\n","Class=4, n=238690 (8.334%)\n","Class=11, n=175353 (6.123%)\n","Class=1, n=117216 (4.093%)\n","Class=3, n=104944 (3.664%)\n","Class=12, n=238761 (8.336%)\n","Class=0, n=188107 (6.568%)\n","Class=2, n=164600 (5.747%)\n","Class=7, n=98199 (3.429%)\n","Class=6, n=42969 (1.500%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from collections import Counter\n","from sklearn.preprocessing import LabelEncoder\n","from matplotlib import pyplot\n","\n","le = LabelEncoder()\n","y_temp = le.fit_transform(y)\n","print(list(le.classes_))\n","counter = Counter(y_temp)\n","\n","print(counter)\n","for k,v in counter.items():\n"," per = v / len(y_temp) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"markdown","id":"2d62ec16","metadata":{"papermill":{"duration":0.009288,"end_time":"2023-08-05T00:07:26.952484","exception":false,"start_time":"2023-08-05T00:07:26.943196","status":"completed"},"tags":[]},"source":["## Method 1: Using Random Forest Classifier "]},{"cell_type":"markdown","id":"ec9d9fce","metadata":{"papermill":{"duration":0.009112,"end_time":"2023-08-05T00:07:26.971127","exception":false,"start_time":"2023-08-05T00:07:26.962015","status":"completed"},"tags":[]},"source":["## Handling missing values"]},{"cell_type":"code","execution_count":5,"id":"68d72f52","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:26.992642Z","iopub.status.busy":"2023-08-05T00:07:26.992214Z","iopub.status.idle":"2023-08-05T00:07:29.167243Z","shell.execute_reply":"2023-08-05T00:07:29.166002Z"},"papermill":{"duration":2.188981,"end_time":"2023-08-05T00:07:29.169898","exception":false,"start_time":"2023-08-05T00:07:26.980917","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.impute import SimpleImputer\n","imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n","imputer.fit(X)\n","X = imputer.transform(X)"]},{"cell_type":"markdown","id":"b99c6d79","metadata":{"papermill":{"duration":0.009124,"end_time":"2023-08-05T00:07:29.188693","exception":false,"start_time":"2023-08-05T00:07:29.179569","status":"completed"},"tags":[]},"source":["## Encoding"]},{"cell_type":"code","execution_count":6,"id":"57a332ea","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.209213Z","iopub.status.busy":"2023-08-05T00:07:29.20881Z","iopub.status.idle":"2023-08-05T00:07:29.792137Z","shell.execute_reply":"2023-08-05T00:07:29.791044Z"},"papermill":{"duration":0.596644,"end_time":"2023-08-05T00:07:29.794779","exception":false,"start_time":"2023-08-05T00:07:29.198135","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","y = le.fit_transform(y)"]},{"cell_type":"code","execution_count":7,"id":"974612a4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:29.81627Z","iopub.status.busy":"2023-08-05T00:07:29.815829Z","iopub.status.idle":"2023-08-05T00:07:31.145986Z","shell.execute_reply":"2023-08-05T00:07:31.144799Z"},"papermill":{"duration":1.343317,"end_time":"2023-08-05T00:07:31.148501","exception":false,"start_time":"2023-08-05T00:07:29.805184","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"8edfbb5a","metadata":{"papermill":{"duration":0.008929,"end_time":"2023-08-05T00:07:31.166893","exception":false,"start_time":"2023-08-05T00:07:31.157964","status":"completed"},"tags":[]},"source":["## Feature Scaling"]},{"cell_type":"code","execution_count":8,"id":"ba643853","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:31.187287Z","iopub.status.busy":"2023-08-05T00:07:31.186875Z","iopub.status.idle":"2023-08-05T00:07:32.475029Z","shell.execute_reply":"2023-08-05T00:07:32.474185Z"},"papermill":{"duration":1.301347,"end_time":"2023-08-05T00:07:32.477471","exception":false,"start_time":"2023-08-05T00:07:31.176124","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"markdown","id":"14ac9a68","metadata":{"papermill":{"duration":0.009663,"end_time":"2023-08-05T00:07:32.496576","exception":false,"start_time":"2023-08-05T00:07:32.486913","status":"completed"},"tags":[]},"source":["## Training RandomForestClassifier"]},{"cell_type":"code","execution_count":9,"id":"1dfae9c0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:07:32.517561Z","iopub.status.busy":"2023-08-05T00:07:32.517142Z","iopub.status.idle":"2023-08-05T00:15:35.089903Z","shell.execute_reply":"2023-08-05T00:15:35.088663Z"},"papermill":{"duration":482.594606,"end_time":"2023-08-05T00:15:35.101103","exception":false,"start_time":"2023-08-05T00:07:32.506497","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(criterion=&#x27;entropy&#x27;, n_estimators=10, random_state=0)</pre></div></div></div></div></div>"],"text/plain":["RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.ensemble import RandomForestClassifier\n","classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)\n","classifier.fit(X_train, y_train)"]},{"cell_type":"markdown","id":"a6207e57","metadata":{"papermill":{"duration":0.009341,"end_time":"2023-08-05T00:15:35.120003","exception":false,"start_time":"2023-08-05T00:15:35.110662","status":"completed"},"tags":[]},"source":["## Predicting Results"]},{"cell_type":"code","execution_count":10,"id":"a963263d","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:35.140981Z","iopub.status.busy":"2023-08-05T00:15:35.140561Z","iopub.status.idle":"2023-08-05T00:15:37.22477Z","shell.execute_reply":"2023-08-05T00:15:37.223543Z"},"papermill":{"duration":2.097825,"end_time":"2023-08-05T00:15:37.227329","exception":false,"start_time":"2023-08-05T00:15:35.129504","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["y_pred = classifier.predict(X_test)\n","print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"7846df5c","metadata":{"papermill":{"duration":0.009748,"end_time":"2023-08-05T00:15:37.247066","exception":false,"start_time":"2023-08-05T00:15:37.237318","status":"completed"},"tags":[]},"source":["## Calculating Accuracy"]},{"cell_type":"code","execution_count":11,"id":"ed6e3e8a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.268284Z","iopub.status.busy":"2023-08-05T00:15:37.267859Z","iopub.status.idle":"2023-08-05T00:15:37.436445Z","shell.execute_reply":"2023-08-05T00:15:37.435065Z"},"papermill":{"duration":0.182461,"end_time":"2023-08-05T00:15:37.439113","exception":false,"start_time":"2023-08-05T00:15:37.256652","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 37634 0 0 0 0 0 0 0 0 0\n"," 8 0 0]\n"," [ 0 23179 0 23 0 0 0 0 0 0\n"," 81 0 0]\n"," [ 0 0 32839 0 0 0 0 0 0 0\n"," 21 0 0]\n"," [ 0 14 0 20904 0 0 0 0 0 0\n"," 43 0 0]\n"," [ 0 0 0 0 47928 0 0 0 0 14\n"," 6 0 0]\n"," [ 0 0 0 0 0 38287 0 0 3 0\n"," 17 0 0]\n"," [ 0 0 1 0 0 0 8591 2 0 0\n"," 5 0 0]\n"," [ 0 0 0 0 0 0 0 19492 0 0\n"," 8 1 0]\n"," [ 0 0 0 0 0 0 0 0 36812 2\n"," 32 0 0]\n"," [ 0 0 0 0 22 0 0 0 4 38082\n"," 57 0 0]\n"," [ 28 61 12 57 25 32 8 24 31 22\n"," 185589 24 20]\n"," [ 0 0 0 0 0 0 0 0 0 0\n"," 41 35025 0]\n"," [ 1 0 0 0 0 0 0 0 0 0\n"," 41 0 47659]]\n"]},{"data":{"text/plain":["0.9986190931754223"]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.metrics import confusion_matrix, accuracy_score\n","cm = confusion_matrix(y_test, y_pred)\n","print(cm)\n","accuracy_score(y_test, y_pred)"]},{"cell_type":"code","execution_count":12,"id":"e609b003","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:37.462377Z","iopub.status.busy":"2023-08-05T00:15:37.461702Z","iopub.status.idle":"2023-08-05T00:15:39.654083Z","shell.execute_reply":"2023-08-05T00:15:39.652958Z"},"papermill":{"duration":2.206834,"end_time":"2023-08-05T00:15:39.656344","exception":false,"start_time":"2023-08-05T00:15:37.44951","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":[" <script type=\"text/javascript\">\n"," window.PlotlyConfig = {MathJaxConfig: 'local'};\n"," if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n"," if (typeof require !== 'undefined') {\n"," require.undef(\"plotly\");\n"," requirejs.config({\n"," paths: {\n"," 'plotly': ['https://cdn.plot.ly/plotly-2.24.1.min']\n"," }\n"," });\n"," require(['plotly'], function(Plotly) {\n"," window._Plotly = Plotly;\n"," });\n"," }\n"," </script>\n"," "]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<div> <div id=\"be7930db-6c4e-497b-b832-416da626298c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"be7930db-6c4e-497b-b832-416da626298c\")) { Plotly.newPlot( \"be7930db-6c4e-497b-b832-416da626298c\", [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"texttemplate\":\"%{z}\",\"z\":[[37634,0,0,0,0,0,0,0,0,0,8,0,0],[0,23179,0,23,0,0,0,0,0,0,81,0,0],[0,0,32839,0,0,0,0,0,0,0,21,0,0],[0,14,0,20904,0,0,0,0,0,0,43,0,0],[0,0,0,0,47928,0,0,0,0,14,6,0,0],[0,0,0,0,0,38287,0,0,3,0,17,0,0],[0,0,1,0,0,0,8591,2,0,0,5,0,0],[0,0,0,0,0,0,0,19492,0,0,8,1,0],[0,0,0,0,0,0,0,0,36812,2,32,0,0],[0,0,0,0,22,0,0,0,4,38082,57,0,0],[28,61,12,57,25,32,8,24,31,22,185589,24,20],[0,0,0,0,0,0,0,0,0,0,41,35025,0],[1,0,0,0,0,0,0,0,0,0,41,0,47659]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}\\u003cbr\\u003ey: %{y}\\u003cbr\\u003ecolor: %{z}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\"}], {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\"},\"coloraxis\":{\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"margin\":{\"t\":60}}, {\"responsive\": true} ).then(function(){\n"," \n","var gd = document.getElementById('be7930db-6c4e-497b-b832-416da626298c');\n","var x = new MutationObserver(function (mutations, observer) {{\n"," var display = window.getComputedStyle(gd).display;\n"," if (!display || display === 'none') {{\n"," console.log([gd, 'removed!']);\n"," Plotly.purge(gd);\n"," observer.disconnect();\n"," }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n"," x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n"," x.observe(outputEl, {childList: true});\n","}}\n","\n"," }) }; }); </script> </div>"]},"metadata":{},"output_type":"display_data"}],"source":["# import matplotlib.pyplot as plt\n","# from sklearn.metrics import ConfusionMatrixDisplay\n","# disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n","# display_labels=classifier.classes_)\n","# disp.plot()\n","# plt.show()\n","import plotly.express as px\n","fig = px.imshow(cm, text_auto=True, aspect=\"auto\")\n","fig.show()"]},{"cell_type":"markdown","id":"6b68fad1","metadata":{"papermill":{"duration":0.010207,"end_time":"2023-08-05T00:15:39.677156","exception":false,"start_time":"2023-08-05T00:15:39.666949","status":"completed"},"tags":[]},"source":["Method 2: KNN"]},{"cell_type":"code","execution_count":13,"id":"219416d1","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:39.699914Z","iopub.status.busy":"2023-08-05T00:15:39.699066Z","iopub.status.idle":"2023-08-05T00:15:40.015603Z","shell.execute_reply":"2023-08-05T00:15:40.014559Z"},"papermill":{"duration":0.330691,"end_time":"2023-08-05T00:15:40.0181","exception":false,"start_time":"2023-08-05T00:15:39.687409","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"],"text/plain":["KNeighborsClassifier()"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["from sklearn.neighbors import KNeighborsClassifier\n","knn_clf = KNeighborsClassifier()\n","knn_clf.fit(X_train,y_train)"]},{"cell_type":"code","execution_count":14,"id":"fe6052b4","metadata":{"execution":{"iopub.execute_input":"2023-08-05T00:15:40.041026Z","iopub.status.busy":"2023-08-05T00:15:40.040612Z","iopub.status.idle":"2023-08-05T01:09:53.645091Z","shell.execute_reply":"2023-08-05T01:09:53.643871Z"},"papermill":{"duration":3253.619328,"end_time":"2023-08-05T01:09:53.648066","exception":false,"start_time":"2023-08-05T00:15:40.028738","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred=knn_clf.predict(X_test)"]},{"cell_type":"code","execution_count":15,"id":"80d0ea82","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.674186Z","iopub.status.busy":"2023-08-05T01:09:53.673791Z","iopub.status.idle":"2023-08-05T01:09:53.682771Z","shell.execute_reply":"2023-08-05T01:09:53.681553Z"},"papermill":{"duration":0.023497,"end_time":"2023-08-05T01:09:53.685168","exception":false,"start_time":"2023-08-05T01:09:53.661671","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4 4]\n"," [10 10]\n"," [ 8 8]\n"," ...\n"," [ 7 7]\n"," [ 8 8]\n"," [ 3 3]]\n"]}],"source":["print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"]},{"cell_type":"markdown","id":"30642482","metadata":{"papermill":{"duration":0.01112,"end_time":"2023-08-05T01:09:53.711764","exception":false,"start_time":"2023-08-05T01:09:53.700644","status":"completed"},"tags":[]},"source":["## Method 2: Using ANN"]},{"cell_type":"code","execution_count":16,"id":"9fe82be2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:53.739787Z","iopub.status.busy":"2023-08-05T01:09:53.738634Z","iopub.status.idle":"2023-08-05T01:09:55.132546Z","shell.execute_reply":"2023-08-05T01:09:55.131479Z"},"papermill":{"duration":1.410752,"end_time":"2023-08-05T01:09:55.135242","exception":false,"start_time":"2023-08-05T01:09:53.72449","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)"]},{"cell_type":"markdown","id":"571dcd4c","metadata":{"papermill":{"duration":0.010439,"end_time":"2023-08-05T01:09:55.156709","exception":false,"start_time":"2023-08-05T01:09:55.14627","status":"completed"},"tags":[]},"source":["## Oversampling the minority classes"]},{"cell_type":"code","execution_count":17,"id":"32271f2b","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:09:55.181016Z","iopub.status.busy":"2023-08-05T01:09:55.180057Z","iopub.status.idle":"2023-08-05T01:19:23.222784Z","shell.execute_reply":"2023-08-05T01:19:23.221544Z"},"papermill":{"duration":568.057647,"end_time":"2023-08-05T01:19:23.225424","exception":false,"start_time":"2023-08-05T01:09:55.167777","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Class=4, n=741642 (7.692%)\n","Class=12, n=741642 (7.692%)\n","Class=10, n=741642 (7.692%)\n","Class=2, n=741642 (7.692%)\n","Class=9, n=741642 (7.692%)\n","Class=8, n=741642 (7.692%)\n","Class=0, n=741642 (7.692%)\n","Class=5, n=741642 (7.692%)\n","Class=1, n=741642 (7.692%)\n","Class=11, n=741642 (7.692%)\n","Class=6, n=741642 (7.692%)\n","Class=3, n=741642 (7.692%)\n","Class=7, n=741642 (7.692%)\n"]},{"data":{"image/png":"","text/plain":["<Figure size 640x480 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["from imblearn.over_sampling import SMOTE\n","oversample = SMOTE()\n","y_train = le.fit_transform(y_train)\n","X_train, y_train = oversample.fit_resample(X_train, y_train)\n","# summarize distribution\n","counter = Counter(y_train)\n","for k,v in counter.items():\n"," per = v / len(y_train) * 100\n"," print('Class=%d, n=%d (%.3f%%)' % (k, v, per))\n","# plot the distribution\n","pyplot.bar(counter.keys(), counter.values())\n","pyplot.show()"]},{"cell_type":"code","execution_count":18,"id":"aa3846f3","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.252818Z","iopub.status.busy":"2023-08-05T01:19:23.251956Z","iopub.status.idle":"2023-08-05T01:19:23.740771Z","shell.execute_reply":"2023-08-05T01:19:23.73973Z"},"papermill":{"duration":0.505699,"end_time":"2023-08-05T01:19:23.743348","exception":false,"start_time":"2023-08-05T01:19:23.237649","status":"completed"},"tags":[]},"outputs":[],"source":["from tensorflow.keras.utils import to_categorical\n","y_train = to_categorical(y_train)"]},{"cell_type":"code","execution_count":19,"id":"0a0017d2","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:23.767819Z","iopub.status.busy":"2023-08-05T01:19:23.767429Z","iopub.status.idle":"2023-08-05T01:19:29.065742Z","shell.execute_reply":"2023-08-05T01:19:29.064536Z"},"papermill":{"duration":5.313708,"end_time":"2023-08-05T01:19:29.068475","exception":false,"start_time":"2023-08-05T01:19:23.754767","status":"completed"},"tags":[]},"outputs":[],"source":["from sklearn.preprocessing import StandardScaler\n","sc = StandardScaler()\n","X_train = sc.fit_transform(X_train)\n","X_test = sc.transform(X_test)"]},{"cell_type":"code","execution_count":20,"id":"c344eb49","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.092749Z","iopub.status.busy":"2023-08-05T01:19:29.092331Z","iopub.status.idle":"2023-08-05T01:19:29.209741Z","shell.execute_reply":"2023-08-05T01:19:29.20847Z"},"papermill":{"duration":0.132568,"end_time":"2023-08-05T01:19:29.21231","exception":false,"start_time":"2023-08-05T01:19:29.079742","status":"completed"},"tags":[]},"outputs":[],"source":["ann = tf.keras.models.Sequential()\n","ann.add(tf.keras.layers.Dense(units=26, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=52, activation='tanh'))\n","ann.add(tf.keras.layers.Dense(units=13, activation='softmax'))"]},{"cell_type":"code","execution_count":21,"id":"c1441ca6","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.236225Z","iopub.status.busy":"2023-08-05T01:19:29.23586Z","iopub.status.idle":"2023-08-05T01:19:29.268014Z","shell.execute_reply":"2023-08-05T01:19:29.266637Z"},"papermill":{"duration":0.046985,"end_time":"2023-08-05T01:19:29.270549","exception":false,"start_time":"2023-08-05T01:19:29.223564","status":"completed"},"tags":[]},"outputs":[],"source":["ann.compile(optimizer='adam' , loss='CategoricalCrossentropy' , metrics=['accuracy'])"]},{"cell_type":"code","execution_count":22,"id":"82e960ec","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:19:29.294423Z","iopub.status.busy":"2023-08-05T01:19:29.294016Z","iopub.status.idle":"2023-08-05T01:43:40.990731Z","shell.execute_reply":"2023-08-05T01:43:40.989437Z"},"papermill":{"duration":1451.711594,"end_time":"2023-08-05T01:43:40.99336","exception":false,"start_time":"2023-08-05T01:19:29.281766","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/10\n","75324/75324 [==============================] - 145s 2ms/step - loss: 0.1724 - accuracy: 0.9415\n","Epoch 2/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0763 - accuracy: 0.9731\n","Epoch 3/10\n","75324/75324 [==============================] - 144s 2ms/step - loss: 0.0601 - accuracy: 0.9788\n","Epoch 4/10\n","75324/75324 [==============================] - 151s 2ms/step - loss: 0.0527 - accuracy: 0.9815\n","Epoch 5/10\n","75324/75324 [==============================] - 142s 2ms/step - loss: 0.0480 - accuracy: 0.9831\n","Epoch 6/10\n","75324/75324 [==============================] - 143s 2ms/step - loss: 0.0452 - accuracy: 0.9840\n","Epoch 7/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0432 - accuracy: 0.9848\n","Epoch 8/10\n","75324/75324 [==============================] - 141s 2ms/step - loss: 0.0417 - accuracy: 0.9854\n","Epoch 9/10\n","75324/75324 [==============================] - 148s 2ms/step - loss: 0.0405 - accuracy: 0.9857\n","Epoch 10/10\n","75324/75324 [==============================] - 146s 2ms/step - loss: 0.0397 - accuracy: 0.9860\n"]}],"source":["history = ann.fit(X_train, y_train, batch_size=128, epochs=10)"]},{"cell_type":"code","execution_count":23,"id":"c66f3d9a","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:43:44.865958Z","iopub.status.busy":"2023-08-05T01:43:44.86558Z","iopub.status.idle":"2023-08-05T01:44:26.055791Z","shell.execute_reply":"2023-08-05T01:44:26.054616Z"},"papermill":{"duration":43.125175,"end_time":"2023-08-05T01:44:26.058358","exception":false,"start_time":"2023-08-05T01:43:42.933183","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["17901/17901 [==============================] - 22s 1ms/step\n"]}],"source":["y_pred = ann.predict(X_test)"]},{"cell_type":"code","execution_count":24,"id":"ca3d6088","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:29.962501Z","iopub.status.busy":"2023-08-05T01:44:29.962091Z","iopub.status.idle":"2023-08-05T01:44:29.991499Z","shell.execute_reply":"2023-08-05T01:44:29.990294Z"},"papermill":{"duration":1.967301,"end_time":"2023-08-05T01:44:29.994142","exception":false,"start_time":"2023-08-05T01:44:28.026841","status":"completed"},"tags":[]},"outputs":[],"source":["result = np.argmax(y_pred, axis=-1)"]},{"cell_type":"code","execution_count":25,"id":"4211d197","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:33.86521Z","iopub.status.busy":"2023-08-05T01:44:33.864484Z","iopub.status.idle":"2023-08-05T01:44:33.869618Z","shell.execute_reply":"2023-08-05T01:44:33.868861Z"},"papermill":{"duration":1.942499,"end_time":"2023-08-05T01:44:33.871763","exception":false,"start_time":"2023-08-05T01:44:31.929264","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["[ 4 10 8 ... 7 8 3]\n"]}],"source":["print(result)"]},{"cell_type":"code","execution_count":26,"id":"e36e98e0","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:37.612404Z","iopub.status.busy":"2023-08-05T01:44:37.61165Z","iopub.status.idle":"2023-08-05T01:44:37.616428Z","shell.execute_reply":"2023-08-05T01:44:37.615631Z"},"papermill":{"duration":1.837241,"end_time":"2023-08-05T01:44:37.618458","exception":false,"start_time":"2023-08-05T01:44:35.781217","status":"completed"},"tags":[]},"outputs":[],"source":["y_test_rs = np.argmax(y_test, axis=-1)"]},{"cell_type":"code","execution_count":27,"id":"49227522","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:41.477349Z","iopub.status.busy":"2023-08-05T01:44:41.476614Z","iopub.status.idle":"2023-08-05T01:44:41.481313Z","shell.execute_reply":"2023-08-05T01:44:41.48054Z"},"papermill":{"duration":1.939073,"end_time":"2023-08-05T01:44:41.484075","exception":false,"start_time":"2023-08-05T01:44:39.545002","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["9\n"]}],"source":["print(y_test_rs)"]},{"cell_type":"code","execution_count":28,"id":"72b1f910","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:45.446971Z","iopub.status.busy":"2023-08-05T01:44:45.446273Z","iopub.status.idle":"2023-08-05T01:44:45.569371Z","shell.execute_reply":"2023-08-05T01:44:45.568125Z"},"papermill":{"duration":2.161727,"end_time":"2023-08-05T01:44:45.571926","exception":false,"start_time":"2023-08-05T01:44:43.410199","status":"completed"},"tags":[]},"outputs":[],"source":["y_pred_enc = pd.get_dummies(result)\n","y_test_enc = pd.get_dummies(y_test)"]},{"cell_type":"code","execution_count":29,"id":"6b54f780","metadata":{"execution":{"iopub.execute_input":"2023-08-05T01:44:49.311088Z","iopub.status.busy":"2023-08-05T01:44:49.310688Z","iopub.status.idle":"2023-08-05T01:44:49.698079Z","shell.execute_reply":"2023-08-05T01:44:49.696602Z"},"papermill":{"duration":2.293735,"end_time":"2023-08-05T01:44:49.700483","exception":false,"start_time":"2023-08-05T01:44:47.406748","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["0.9624047680565351\n"]}],"source":["from sklearn.metrics import accuracy_score\n","print(accuracy_score(y_test_enc, y_pred_enc))"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":5890.466662,"end_time":"2023-08-05T01:44:54.111787","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-08-05T00:06:43.645125","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} No newline at end of file
Copy link
Collaborator

@coderpotter coderpotter Aug 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

an explanation for this architecture would be nice


Reply via ReviewNB

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants