SergeyZ06 · SergeyZ06 · May 8, 2021 · May 10, 2021 · May 15, 2021
diff --git a/6th hometask/1th_task.ipynb b/6th hometask/1th_task.ipynb
diff --git a/6th hometask/2th_task.ipynb b/6th hometask/2th_task.ipynb
diff --git a/6th hometask/3th_task.ipynb b/6th hometask/3th_task.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fa9c98d3",
+   "metadata": {},
+   "source": [
+    "### *Задание 3"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07cc21bb",
+   "metadata": {},
+   "source": [
+    "Вызовите документацию для класса RandomForestRegressor, найдите информацию об атрибуте feature_importances_."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f97b27dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.ensemble import RandomForestRegressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b1b12422",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "? RandomForestRegressor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "96f3059f",
+   "metadata": {},
+   "source": [
+    "feature_importances_ : ndarray of shape (n_features,)\n",
+    "    The impurity-based feature importances.\n",
+    "    The higher, the more important the feature.\n",
+    "    The importance of a feature is computed as the (normalized)\n",
+    "    total reduction of the criterion brought by that feature.  It is also\n",
+    "    known as the Gini importance.\n",
+    "\n",
+    "    Warning: impurity-based feature importances can be misleading for\n",
+    "    high cardinality features (many unique values). See\n",
+    "    :func:`sklearn.inspection.permutation_importance` as an alternative."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d7004d95",
+   "metadata": {},
+   "source": [
+    "С помощью этого атрибута найдите сумму всех показателей важности,\n",
+    "установите, какие два признака показывают наибольшую важность."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "745c9be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train = pd.read_pickle('data/x_train.pkl')\n",
+    "y_train = pd.read_pickle('data/y_train.pkl')\n",
+    "\n",
+    "x_test = pd.read_pickle('data/x_test.pkl')\n",
+    "y_test = pd.read_pickle('data/y_test.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "fd0cfe6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = RandomForestRegressor( n_estimators = 1000,\n",
+    "                               max_depth = 12,\n",
+    "                               random_state = 42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b7de19ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(x_train, y_train.values[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7b36b206",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.03167574, 0.00154252, 0.00713813, 0.00123624, 0.01426897,\n",
+       "       0.40268179, 0.01429864, 0.06397257, 0.00528122, 0.01152493,\n",
+       "       0.01808108, 0.01245085, 0.41584732])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.feature_importances_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5229466a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.feature_importances_.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "814158fc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CRIM</th>\n",
+       "      <th>ZN</th>\n",
+       "      <th>INDUS</th>\n",
+       "      <th>CHAS</th>\n",
+       "      <th>NOX</th>\n",
+       "      <th>RM</th>\n",
+       "      <th>AGE</th>\n",
+       "      <th>DIS</th>\n",
+       "      <th>RAD</th>\n",
+       "      <th>TAX</th>\n",
+       "      <th>PTRATIO</th>\n",
+       "      <th>B</th>\n",
+       "      <th>LSTAT</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.031676</td>\n",
+       "      <td>0.001543</td>\n",
+       "      <td>0.007138</td>\n",
+       "      <td>0.001236</td>\n",
+       "      <td>0.014269</td>\n",
+       "      <td>0.402682</td>\n",
+       "      <td>0.014299</td>\n",
+       "      <td>0.063973</td>\n",
+       "      <td>0.005281</td>\n",
+       "      <td>0.011525</td>\n",
+       "      <td>0.018081</td>\n",
+       "      <td>0.012451</td>\n",
+       "      <td>0.415847</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       CRIM        ZN     INDUS      CHAS       NOX        RM       AGE  \\\n",
+       "0  0.031676  0.001543  0.007138  0.001236  0.014269  0.402682  0.014299   \n",
+       "\n",
+       "        DIS       RAD       TAX   PTRATIO         B     LSTAT  \n",
+       "0  0.063973  0.005281  0.011525  0.018081  0.012451  0.415847  "
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "feature_importances = pd.DataFrame([model.feature_importances_], columns = x_test.columns)\n",
+    "feature_importances"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "14becbc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LSTAT    0.415847\n",
+       "RM       0.402682\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "feature_importances.max().nlargest(n = 2, keep = 'first')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4a0b4d4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}