From 99555bb2743606de013d126a94523c2f678c3c1f Mon Sep 17 00:00:00 2001 From: SergeyZ06 Date: Sat, 8 May 2021 19:10:55 +0300 Subject: [PATCH 1/3] Solving the sixth hometask without supplementary tasks yet --- 1th_task.ipynb | 1469 ++++++++++++++++++++++++++++++++++++ 2th_task.ipynb | 420 +++++++++++ 3th_task.ipynb | 292 +++++++ 4th_task.ipynb | 1888 ++++++++++++++++++++++++++++++++++++++++++++++ data/x_test.pkl | Bin 0 -> 17920 bytes data/x_train.pkl | Bin 0 -> 40546 bytes data/y_test.pkl | Bin 0 -> 3224 bytes data/y_train.pkl | Bin 0 -> 6458 bytes score/scores.pkl | Bin 0 -> 798 bytes 9 files changed, 4069 insertions(+) create mode 100644 1th_task.ipynb create mode 100644 2th_task.ipynb create mode 100644 3th_task.ipynb create mode 100644 4th_task.ipynb create mode 100644 data/x_test.pkl create mode 100644 data/x_train.pkl create mode 100644 data/y_test.pkl create mode 100644 data/y_train.pkl create mode 100644 score/scores.pkl diff --git a/1th_task.ipynb b/1th_task.ipynb new file mode 100644 index 0000000..458053a --- /dev/null +++ b/1th_task.ipynb @@ -0,0 +1,1469 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b8e01db7", + "metadata": {}, + "source": [ + "### Задание 1" + ] + }, + { + "cell_type": "markdown", + "id": "f56dd684", + "metadata": {}, + "source": [ + "Импортируйте библиотеки pandas и numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7b6be97f", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "d3e4295d", + "metadata": {}, + "source": [ + "Загрузите \"Boston House Prices dataset\" из встроенных наборов данных библиотеки sklearn. Создайте датафреймы X и y из этих данных." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e0dd0aaf", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_boston" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "992c81bc", + "metadata": {}, + "outputs": [], + "source": [ + "boston = load_boston()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "76613d69", + "metadata": {}, + "outputs": [], + "source": [ + "x = pd.DataFrame(boston.data, columns = boston.feature_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c8b73627", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", + "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", + "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", + "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", + "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", + "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", + "\n", + " PTRATIO B LSTAT \n", + "0 15.3 396.90 4.98 \n", + "1 17.8 396.90 9.14 \n", + "2 17.8 392.83 4.03 \n", + "3 18.7 394.63 2.94 \n", + "4 18.7 396.90 5.33 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6d4d6c4c", + "metadata": {}, + "outputs": [], + "source": [ + "y = pd.DataFrame(boston.target, columns = ['Price'])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "687ecc51", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Price
024.0
121.6
234.7
333.4
436.2
\n", + "
" + ], + "text/plain": [ + " Price\n", + "0 24.0\n", + "1 21.6\n", + "2 34.7\n", + "3 33.4\n", + "4 36.2" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4fd3308a", + "metadata": {}, + "source": [ + "Разбейте эти датафреймы на тренировочные (X_train, y_train) и тестовые (X_test, y_test) с помощью функции train_test_split так, чтобы размер тестовой выборки составлял 30% от всех данных, при этом аргумент random_state должен быть равен 42." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ca9dadc2", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c3f658ee", + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0e66bbdb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
count506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000
mean3.61352411.36363611.1367790.0691700.5546956.28463468.5749013.7950439.549407408.23715418.455534356.67403212.653063
std8.60154523.3224536.8603530.2539940.1158780.70261728.1488612.1057108.707259168.5371162.16494691.2948647.141062
min0.0063200.0000000.4600000.0000000.3850003.5610002.9000001.1296001.000000187.00000012.6000000.3200001.730000
25%0.0820450.0000005.1900000.0000000.4490005.88550045.0250002.1001754.000000279.00000017.400000375.3775006.950000
50%0.2565100.0000009.6900000.0000000.5380006.20850077.5000003.2074505.000000330.00000019.050000391.44000011.360000
75%3.67708312.50000018.1000000.0000000.6240006.62350094.0750005.18842524.000000666.00000020.200000396.22500016.955000
max88.976200100.00000027.7400001.0000000.8710008.780000100.00000012.12650024.000000711.00000022.000000396.90000037.970000
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM \\\n", + "count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n", + "mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634 \n", + "std 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617 \n", + "min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000 \n", + "25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500 \n", + "50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500 \n", + "75% 3.677083 12.500000 18.100000 0.000000 0.624000 6.623500 \n", + "max 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000 \n", + "\n", + " AGE DIS RAD TAX PTRATIO B \\\n", + "count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n", + "mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032 \n", + "std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864 \n", + "min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n", + "25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500 \n", + "50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000 \n", + "75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000 \n", + "max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n", + "\n", + " LSTAT \n", + "count 506.000000 \n", + "mean 12.653063 \n", + "std 7.141062 \n", + "min 1.730000 \n", + "25% 6.950000 \n", + "50% 11.360000 \n", + "75% 16.955000 \n", + "max 37.970000 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "61c8df99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
count354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000354.000000
mean3.46988711.40395511.1330510.0734460.5572596.32567268.7997183.7658749.437853407.04237318.277966359.70180812.421130
std8.31583122.6084576.9386510.2612370.1167920.71921127.6653602.1260328.639971166.5222372.25679286.9247797.112402
min0.0090600.0000001.2100000.0000000.3850003.8630002.9000001.1296001.000000187.00000012.6000000.3200001.730000
25%0.0823200.0000005.1300000.0000000.4500005.88950046.0250002.0737004.000000279.00000016.650000376.7825006.862500
50%0.2565100.0000009.6900000.0000000.5380006.22750077.3500003.1073005.000000330.00000018.600000391.60000010.925000
75%3.28168820.00000018.1000000.0000000.6310006.68175093.4750005.40070024.000000666.00000020.200000396.17250016.225000
max88.97620095.00000027.7400001.0000000.8710008.780000100.00000012.12650024.000000711.00000022.000000396.90000037.970000
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM \\\n", + "count 354.000000 354.000000 354.000000 354.000000 354.000000 354.000000 \n", + "mean 3.469887 11.403955 11.133051 0.073446 0.557259 6.325672 \n", + "std 8.315831 22.608457 6.938651 0.261237 0.116792 0.719211 \n", + "min 0.009060 0.000000 1.210000 0.000000 0.385000 3.863000 \n", + "25% 0.082320 0.000000 5.130000 0.000000 0.450000 5.889500 \n", + "50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.227500 \n", + "75% 3.281688 20.000000 18.100000 0.000000 0.631000 6.681750 \n", + "max 88.976200 95.000000 27.740000 1.000000 0.871000 8.780000 \n", + "\n", + " AGE DIS RAD TAX PTRATIO B \\\n", + "count 354.000000 354.000000 354.000000 354.000000 354.000000 354.000000 \n", + "mean 68.799718 3.765874 9.437853 407.042373 18.277966 359.701808 \n", + "std 27.665360 2.126032 8.639971 166.522237 2.256792 86.924779 \n", + "min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n", + "25% 46.025000 2.073700 4.000000 279.000000 16.650000 376.782500 \n", + "50% 77.350000 3.107300 5.000000 330.000000 18.600000 391.600000 \n", + "75% 93.475000 5.400700 24.000000 666.000000 20.200000 396.172500 \n", + "max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n", + "\n", + " LSTAT \n", + "count 354.000000 \n", + "mean 12.421130 \n", + "std 7.112402 \n", + "min 1.730000 \n", + "25% 6.862500 \n", + "50% 10.925000 \n", + "75% 16.225000 \n", + "max 37.970000 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1c13b16b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
count152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000152.000000
mean3.94804611.26973711.1454610.0592110.5487236.18905968.0513163.8629759.809211411.01973718.869079349.62250013.193224
std9.25284424.9838286.6970320.2367990.1138770.65470729.3308442.0629358.885388173.6662121.876845100.6932187.201917
min0.0063200.0000000.4600000.0000000.3920003.5610006.2000001.1691001.000000188.00000013.0000002.5200002.880000
25%0.0797100.0000005.6925000.0000000.4487505.87275039.7000002.2167504.000000280.00000017.800000370.1450007.482500
50%0.2430350.0000009.6900000.0000000.5320006.15950078.5000003.5849505.000000330.00000019.200000390.93000012.020000
75%4.3671630.00000018.1000000.0000000.6140006.46350095.0000005.03567524.000000666.00000020.200000396.30750017.665000
max73.534100100.00000027.7400001.0000000.8710008.725000100.00000010.71030024.000000711.00000022.000000396.90000036.980000
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM \\\n", + "count 152.000000 152.000000 152.000000 152.000000 152.000000 152.000000 \n", + "mean 3.948046 11.269737 11.145461 0.059211 0.548723 6.189059 \n", + "std 9.252844 24.983828 6.697032 0.236799 0.113877 0.654707 \n", + "min 0.006320 0.000000 0.460000 0.000000 0.392000 3.561000 \n", + "25% 0.079710 0.000000 5.692500 0.000000 0.448750 5.872750 \n", + "50% 0.243035 0.000000 9.690000 0.000000 0.532000 6.159500 \n", + "75% 4.367163 0.000000 18.100000 0.000000 0.614000 6.463500 \n", + "max 73.534100 100.000000 27.740000 1.000000 0.871000 8.725000 \n", + "\n", + " AGE DIS RAD TAX PTRATIO B \\\n", + "count 152.000000 152.000000 152.000000 152.000000 152.000000 152.000000 \n", + "mean 68.051316 3.862975 9.809211 411.019737 18.869079 349.622500 \n", + "std 29.330844 2.062935 8.885388 173.666212 1.876845 100.693218 \n", + "min 6.200000 1.169100 1.000000 188.000000 13.000000 2.520000 \n", + "25% 39.700000 2.216750 4.000000 280.000000 17.800000 370.145000 \n", + "50% 78.500000 3.584950 5.000000 330.000000 19.200000 390.930000 \n", + "75% 95.000000 5.035675 24.000000 666.000000 20.200000 396.307500 \n", + "max 100.000000 10.710300 24.000000 711.000000 22.000000 396.900000 \n", + "\n", + " LSTAT \n", + "count 152.000000 \n", + "mean 13.193224 \n", + "std 7.201917 \n", + "min 2.880000 \n", + "25% 7.482500 \n", + "50% 12.020000 \n", + "75% 17.665000 \n", + "max 36.980000 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_test.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1efdc5fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Price
count506.000000
mean22.532806
std9.197104
min5.000000
25%17.025000
50%21.200000
75%25.000000
max50.000000
\n", + "
" + ], + "text/plain": [ + " Price\n", + "count 506.000000\n", + "mean 22.532806\n", + "std 9.197104\n", + "min 5.000000\n", + "25% 17.025000\n", + "50% 21.200000\n", + "75% 25.000000\n", + "max 50.000000" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "01c09fd3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Price
count354.000000
mean23.015819
std9.388585
min5.000000
25%17.400000
50%21.750000
75%26.600000
max50.000000
\n", + "
" + ], + "text/plain": [ + " Price\n", + "count 354.000000\n", + "mean 23.015819\n", + "std 9.388585\n", + "min 5.000000\n", + "25% 17.400000\n", + "50% 21.750000\n", + "75% 26.600000\n", + "max 50.000000" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "eb6a8d0d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Price
count152.000000
mean21.407895
std8.660633
min5.000000
25%16.325000
50%20.000000
75%24.125000
max50.000000
\n", + "
" + ], + "text/plain": [ + " Price\n", + "count 152.000000\n", + "mean 21.407895\n", + "std 8.660633\n", + "min 5.000000\n", + "25% 16.325000\n", + "50% 20.000000\n", + "75% 24.125000\n", + "max 50.000000" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "fafc1d21", + "metadata": {}, + "source": [ + "Создайте модель линейной регрессии под названием lr с помощью класса LinearRegression из модуля sklearn.linear_model." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ee2c5fc1", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "31f423ca", + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()" + ] + }, + { + "cell_type": "markdown", + "id": "061127fe", + "metadata": {}, + "source": [ + "Обучите модель на тренировочных данных (используйте все признаки) и сделайте предсказание на тестовых." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "b6b64a8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b657274c", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = lr.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "id": "8e9e2228", + "metadata": {}, + "source": [ + "Вычислите R2 полученных предказаний с помощью r2_score из модуля sklearn.metrics." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "1f9547b7", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7e15062a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.711226005748496" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr_r2_score = r2_score(y_test, y_pred)\n", + "lr_r2_score" + ] + }, + { + "cell_type": "markdown", + "id": "6ae23844", + "metadata": {}, + "source": [ + "Сохранение результатов для использования в следующих заданиях" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "888b0771", + "metadata": {}, + "outputs": [], + "source": [ + "x_train.to_pickle('data/x_train.pkl')\n", + "y_train.to_pickle('data/y_train.pkl')\n", + "\n", + "x_test.to_pickle('data/x_test.pkl')\n", + "y_test.to_pickle('data/y_test.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "7644ca78", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lr_r2_score
00.711226
\n", + "
" + ], + "text/plain": [ + " lr_r2_score\n", + "0 0.711226" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores = pd.DataFrame()\n", + "scores['lr_r2_score'] = [lr_r2_score]\n", + "scores" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "ff3ebdac", + "metadata": {}, + "outputs": [], + "source": [ + "scores.to_pickle('score/scores.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44a16517", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/2th_task.ipynb b/2th_task.ipynb new file mode 100644 index 0000000..94ee83e --- /dev/null +++ b/2th_task.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6b09e506", + "metadata": {}, + "source": [ + "### Задание 2" + ] + }, + { + "cell_type": "markdown", + "id": "60671eb0", + "metadata": {}, + "source": [ + "Создайте модель под названием model с помощью RandomForestRegressor из модуля sklearn.ensemble." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d131b423", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.ensemble import RandomForestRegressor" + ] + }, + { + "cell_type": "markdown", + "id": "36eba70b", + "metadata": {}, + "source": [ + "Сделайте агрумент n_estimators равным 1000,\n", + "max_depth должен быть равен 12 и random_state сделайте равным 42." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d520c5ac", + "metadata": {}, + "outputs": [], + "source": [ + "model = RandomForestRegressor( n_estimators = 1000,\n", + " max_depth = 12,\n", + " random_state = 42)" + ] + }, + { + "cell_type": "markdown", + "id": "5aefeaca", + "metadata": {}, + "source": [ + "Обучите модель на тренировочных данных аналогично тому, как вы обучали модель LinearRegression,\n", + "но при этом в метод fit вместо датафрейма y_train поставьте y_train.values[:, 0],\n", + "чтобы получить из датафрейма одномерный массив Numpy,\n", + "так как для класса RandomForestRegressor в данном методе для аргумента y предпочтительно применение массивов вместо датафрейма." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d77514d", + "metadata": {}, + "outputs": [], + "source": [ + "x_train = pd.read_pickle('data/x_train.pkl')\n", + "y_train = pd.read_pickle('data/y_train.pkl')\n", + "\n", + "x_test = pd.read_pickle('data/x_test.pkl')\n", + "y_test = pd.read_pickle('data/y_test.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d8b1f148", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(354, 13)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b4ab3cbe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(354, 1)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c36ea22b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(152, 13)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "de82d32b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(152, 1)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8910b188", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(x_train, y_train.values[:, 0])" + ] + }, + { + "cell_type": "markdown", + "id": "2e94fe97", + "metadata": {}, + "source": [ + "Сделайте предсказание на тестовых данных и посчитайте R2." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bd44a8ba", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "9ec5c4f1", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "74bdd189", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.87472606157312" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfr = r2_score(y_test, y_pred)\n", + "rfr" + ] + }, + { + "cell_type": "markdown", + "id": "96ac37ce", + "metadata": {}, + "source": [ + "Сравните с результатом из предыдущего задания." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "fa012958", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lr_r2_score
00.711226
\n", + "
" + ], + "text/plain": [ + " lr_r2_score\n", + "0 0.711226" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores = pd.read_pickle('score/scores.pkl')\n", + "scores" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "d413281f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lr_r2_scorerfr_r2_score
00.7112260.874726
\n", + "
" + ], + "text/plain": [ + " lr_r2_score rfr_r2_score\n", + "0 0.711226 0.874726" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores['rfr_r2_score'] = [rfr]\n", + "scores.to_pickle('score/scores.pkl')\n", + "scores" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "f7c628db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model \"Random Forest Regression\" is more accurate than model \"Linear Regression\"!\n" + ] + } + ], + "source": [ + "if scores['lr_r2_score'].values[0] > scores['rfr_r2_score'].values[0]:\n", + " print(f'Model \"Linear Regression\" is more accurate than model \"Random Forest Regression\"!')\n", + "elif scores['lr_r2_score'].values[0] == scores['rfr_r2_score'].values[0]:\n", + " print(f'Model \"Linear Regression\" has the same accuracy as model \"Random Forest Regression\"!')\n", + "elif scores['lr_r2_score'].values[0] < scores['rfr_r2_score'].values[0]:\n", + " print(f'Model \"Random Forest Regression\" is more accurate than model \"Linear Regression\"!')" + ] + }, + { + "cell_type": "markdown", + "id": "fb9138d7", + "metadata": {}, + "source": [ + "Напишите в комментариях к коду, какая модель в данном случае работает лучше." + ] + }, + { + "cell_type": "markdown", + "id": "a5fa7c0c", + "metadata": {}, + "source": [ + "RandomForestRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0829d1e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3th_task.ipynb b/3th_task.ipynb new file mode 100644 index 0000000..558e3bf --- /dev/null +++ b/3th_task.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fa9c98d3", + "metadata": {}, + "source": [ + "### *Задание 3" + ] + }, + { + "cell_type": "markdown", + "id": "07cc21bb", + "metadata": {}, + "source": [ + "Вызовите документацию для класса RandomForestRegressor, найдите информацию об атрибуте feature_importances_." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f97b27dc", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.ensemble import RandomForestRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b1b12422", + "metadata": {}, + "outputs": [], + "source": [ + "? RandomForestRegressor" + ] + }, + { + "cell_type": "markdown", + "id": "96f3059f", + "metadata": {}, + "source": [ + "feature_importances_ : ndarray of shape (n_features,)\n", + " The impurity-based feature importances.\n", + " The higher, the more important the feature.\n", + " The importance of a feature is computed as the (normalized)\n", + " total reduction of the criterion brought by that feature. It is also\n", + " known as the Gini importance.\n", + "\n", + " Warning: impurity-based feature importances can be misleading for\n", + " high cardinality features (many unique values). See\n", + " :func:`sklearn.inspection.permutation_importance` as an alternative." + ] + }, + { + "cell_type": "markdown", + "id": "d7004d95", + "metadata": {}, + "source": [ + "С помощью этого атрибута найдите сумму всех показателей важности,\n", + "установите, какие два признака показывают наибольшую важность." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "745c9be1", + "metadata": {}, + "outputs": [], + "source": [ + "x_train = pd.read_pickle('data/x_train.pkl')\n", + "y_train = pd.read_pickle('data/y_train.pkl')\n", + "\n", + "x_test = pd.read_pickle('data/x_test.pkl')\n", + "y_test = pd.read_pickle('data/y_test.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fd0cfe6f", + "metadata": {}, + "outputs": [], + "source": [ + "model = RandomForestRegressor( n_estimators = 1000,\n", + " max_depth = 12,\n", + " random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b7de19ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(x_train, y_train.values[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7b36b206", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.03167574, 0.00154252, 0.00713813, 0.00123624, 0.01426897,\n", + " 0.40268179, 0.01429864, 0.06397257, 0.00528122, 0.01152493,\n", + " 0.01808108, 0.01245085, 0.41584732])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.feature_importances_" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5229466a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.feature_importances_.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "814158fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0316760.0015430.0071380.0012360.0142690.4026820.0142990.0639730.0052810.0115250.0180810.0124510.415847
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE \\\n", + "0 0.031676 0.001543 0.007138 0.001236 0.014269 0.402682 0.014299 \n", + "\n", + " DIS RAD TAX PTRATIO B LSTAT \n", + "0 0.063973 0.005281 0.011525 0.018081 0.012451 0.415847 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_importances = pd.DataFrame([model.feature_importances_], columns = x_test.columns)\n", + "feature_importances" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "14becbc9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LSTAT 0.415847\n", + "RM 0.402682\n", + "dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_importances.max().nlargest(n = 2, keep = 'first')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4a0b4d4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/4th_task.ipynb b/4th_task.ipynb new file mode 100644 index 0000000..ab2d27f --- /dev/null +++ b/4th_task.ipynb @@ -0,0 +1,1888 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6f6067a1", + "metadata": {}, + "source": [ + "### *Задание 4" + ] + }, + { + "cell_type": "markdown", + "id": "2a1b6811", + "metadata": {}, + "source": [ + "В этом задании мы будем работать с датасетом, с которым мы уже знакомы по домашнему заданию по библиотеке Matplotlib, это датасет Credit Card Fraud Detection.Для этого датасета мы будем решать задачу классификации - будем определять,какие из транзакциции по кредитной карте являются мошенническими.Данный датасет сильно несбалансирован (так как случаи мошенничества относительно редки),так что применение метрики accuracy не принесет пользы и не поможет выбрать лучшую модель.Мы будем вычислять AUC, то есть площадь под кривой ROC." + ] + }, + { + "cell_type": "markdown", + "id": "6f202fcb", + "metadata": {}, + "source": [ + "Импортируйте из соответствующих модулей RandomForestClassifier, GridSearchCV и train_test_split." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "55193f37", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import GridSearchCV" + ] + }, + { + "cell_type": "markdown", + "id": "2c00f911", + "metadata": {}, + "source": [ + "Загрузите датасет creditcard.csv и создайте датафрейм df." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "58d24857", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../4th hometask/creditcard.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "793caebd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05...2.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000284807.000000
mean94813.8595751.168375e-153.416908e-16-1.379537e-152.074095e-159.604066e-161.487313e-15-5.556467e-161.213481e-16-2.406331e-15...1.654067e-16-3.568593e-162.578648e-164.473266e-155.340915e-161.683437e-15-3.660091e-16-1.227390e-1688.3496190.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+00...7.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-01250.1201090.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01...-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+010.0000000.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01...-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-025.6000000.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02...-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-0222.0000000.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-01...1.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-0277.1650000.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+01...2.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+0125691.1600001.000000
\n", + "

8 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 \\\n", + "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n", + "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", + "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", + "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", + "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", + "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", + "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", + "\n", + " V5 V6 V7 V8 V9 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n", + "std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n", + "min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n", + "25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n", + "50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n", + "75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n", + "max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n", + "\n", + " ... V21 V22 V23 V24 \\\n", + "count ... 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean ... 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n", + "std ... 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n", + "min ... -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n", + "25% ... -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n", + "50% ... -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n", + "75% ... 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n", + "max ... 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n", + "\n", + " V25 V26 V27 V28 Amount \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 \n", + "mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 88.349619 \n", + "std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 \n", + "min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 \n", + "25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 \n", + "50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 \n", + "75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 \n", + "max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 \n", + "\n", + " Class \n", + "count 284807.000000 \n", + "mean 0.001727 \n", + "std 0.041527 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.000000 \n", + "75% 0.000000 \n", + "max 1.000000 \n", + "\n", + "[8 rows x 31 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "f4fba9cd", + "metadata": {}, + "source": [ + "С помощью метода value_counts с аргументом normalize=True убедитесь в том, что выборка несбалансирована." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "392ebbbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Time V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 Amount Class\n", + "163152.0 -1.196037 1.585949 2.883976 3.378471 1.511706 3.717077 0.585362 -0.156001 0.122648 4.217934 1.385525 -0.709405 -0.256168 -1.564352 1.693218 -0.785210 -0.228008 -0.412833 0.234834 1.375790 -0.370294 0.524395 -0.355170 -0.869790 -0.133198 0.327804 -0.035702 -0.858197 7.56 0 0.000063\n", + " -1.203617 1.574009 2.889277 3.381404 1.538663 3.698747 0.560211 -0.150911 0.124136 4.220998 1.384569 -0.706897 -0.256274 -1.562583 1.692915 -0.787338 -0.226776 -0.412354 0.234322 1.385597 -0.366727 0.522223 -0.357329 -0.870174 -0.134166 0.327019 -0.042648 -0.855262 1.51 0 0.000063\n", + "43153.0 -2.086016 2.203265 1.654339 2.941050 -1.683045 0.529728 -1.352162 1.793449 -0.723686 0.600365 -0.982212 -0.551636 -1.337000 0.834403 1.251862 0.033455 1.067978 0.160510 0.213087 0.079002 0.216444 0.567241 -0.035345 0.370201 0.157378 0.440341 0.210230 0.090558 0.76 0 0.000032\n", + "170731.0 2.033492 0.766969 -2.107555 3.631952 1.348594 -0.499907 0.945159 -0.286392 -1.370581 1.653073 -1.600434 -1.510901 -2.143280 1.189850 -0.875588 0.175808 -0.419433 -0.464717 -1.414528 -0.430560 0.241894 0.658545 -0.102644 0.580535 0.643637 0.347240 -0.116618 -0.078601 0.76 0 0.000032\n", + "68207.0 -13.192671 12.785971 -9.906650 3.320337 -4.801176 5.760059 -18.750889 -37.353443 -0.391540 -5.052502 4.406806 -4.610756 -1.909488 -9.072711 -0.226074 -6.211557 -6.248145 -3.149247 0.051576 -3.493050 27.202839 -8.887017 5.303607 -0.639435 0.263203 -0.108877 1.269566 0.939407 1.00 1 0.000021\n", + " ... \n", + "65149.0 -0.608037 0.277482 2.333740 0.713876 -0.686327 0.424502 0.158410 0.277078 0.005665 -0.574444 -0.383596 0.063757 0.435809 -0.294166 1.561564 0.430549 -0.512260 0.321857 -1.089111 0.192164 0.425425 1.077523 0.095700 0.080007 -0.087784 -0.253436 0.077868 0.055774 115.98 0 0.000004\n", + " 0.890428 -0.914533 0.916273 0.533497 -1.417793 -0.283902 -0.520284 0.002223 -1.050330 0.827726 1.336306 0.961705 0.778165 0.101997 0.352339 -0.892199 -0.538873 1.792922 -1.092627 -0.119284 -0.239564 -0.634749 -0.018377 0.482486 0.102384 -0.559266 0.040121 0.067240 192.05 0 0.000004\n", + "65150.0 -0.819167 1.289630 1.155617 -0.356589 0.742668 -1.179886 1.114827 -0.105033 -1.169136 -1.218791 1.841286 0.558376 0.081792 -1.028918 -0.400824 0.722658 0.402985 0.613975 -0.605494 -0.014715 -0.011025 -0.125263 -0.385443 0.449483 0.536560 0.252429 -0.020876 0.072608 0.76 0 0.000004\n", + " -0.283939 1.355339 0.553398 0.255501 0.561040 -1.338352 1.056880 -0.229176 -0.738105 -1.157676 0.000759 -0.543236 -0.313497 -1.370815 0.770587 0.452886 1.064176 0.458320 -0.199074 -0.019922 -0.076192 -0.211969 -0.256209 0.259185 0.096589 0.327896 0.021232 0.083294 0.76 0 0.000004\n", + "172792.0 -0.533413 -0.189733 0.703337 -0.506271 -0.012546 -0.649617 1.577006 -0.414650 0.486180 -0.915427 -1.040458 -0.031513 -0.188093 -0.084316 0.041333 -0.302620 -0.660377 0.167430 -0.256117 0.382948 0.261057 0.643078 0.376777 0.008797 -0.473649 -0.818267 -0.002415 0.013649 217.00 0 0.000004\n", + "Length: 283726, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.value_counts(normalize = True)" + ] + }, + { + "cell_type": "markdown", + "id": "b6deb0d7", + "metadata": {}, + "source": [ + "Используя метод info, проверьте, все ли столбцы содержат числовые данные и нет ли в них пропусков." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0dd0d108", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 284807 entries, 0 to 284806\n", + "Data columns (total 31 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Time 284807 non-null float64\n", + " 1 V1 284807 non-null float64\n", + " 2 V2 284807 non-null float64\n", + " 3 V3 284807 non-null float64\n", + " 4 V4 284807 non-null float64\n", + " 5 V5 284807 non-null float64\n", + " 6 V6 284807 non-null float64\n", + " 7 V7 284807 non-null float64\n", + " 8 V8 284807 non-null float64\n", + " 9 V9 284807 non-null float64\n", + " 10 V10 284807 non-null float64\n", + " 11 V11 284807 non-null float64\n", + " 12 V12 284807 non-null float64\n", + " 13 V13 284807 non-null float64\n", + " 14 V14 284807 non-null float64\n", + " 15 V15 284807 non-null float64\n", + " 16 V16 284807 non-null float64\n", + " 17 V17 284807 non-null float64\n", + " 18 V18 284807 non-null float64\n", + " 19 V19 284807 non-null float64\n", + " 20 V20 284807 non-null float64\n", + " 21 V21 284807 non-null float64\n", + " 22 V22 284807 non-null float64\n", + " 23 V23 284807 non-null float64\n", + " 24 V24 284807 non-null float64\n", + " 25 V25 284807 non-null float64\n", + " 26 V26 284807 non-null float64\n", + " 27 V27 284807 non-null float64\n", + " 28 V28 284807 non-null float64\n", + " 29 Amount 284807 non-null float64\n", + " 30 Class 284807 non-null int64 \n", + "dtypes: float64(30), int64(1)\n", + "memory usage: 67.4 MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "8c98fba9", + "metadata": {}, + "source": [ + "Примените следующую настройку, чтобы можно было просматривать все столбцы датафрейма:\n", + "pd.options.display.max_columns = 100." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "16a2b2cc", + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.max_columns = 100" + ] + }, + { + "cell_type": "markdown", + "id": "79d14576", + "metadata": {}, + "source": [ + "Просмотрите первые 10 строк датафрейма df." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a072aecf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
52.0-0.4259660.9605231.141109-0.1682520.420987-0.0297280.4762010.260314-0.568671-0.3714071.3412620.359894-0.358091-0.1371340.5176170.401726-0.0581330.068653-0.0331940.084968-0.208254-0.559825-0.026398-0.371427-0.2327940.1059150.2538440.0810803.670
64.01.2296580.1410040.0453711.2026130.1918810.272708-0.0051590.0812130.464960-0.099254-1.416907-0.153826-0.7510630.1673720.050144-0.4435870.002821-0.611987-0.045575-0.219633-0.167716-0.270710-0.154104-0.7800550.750137-0.2572370.0345070.0051684.990
77.0-0.6442691.4179641.074380-0.4921990.9489340.4281181.120631-3.8078640.6153751.249376-0.6194680.2914741.757964-1.3238650.686133-0.076127-1.222127-0.3582220.324505-0.1567421.943465-1.0154550.057504-0.649709-0.415267-0.051634-1.206921-1.08533940.800
87.0-0.8942860.286157-0.113192-0.2715262.6695993.7218180.3701450.851084-0.392048-0.410430-0.705117-0.110452-0.2862540.074355-0.328783-0.210077-0.4997680.1187650.5703280.052736-0.073425-0.268092-0.2042331.0115920.373205-0.3841570.0117470.14240493.200
99.0-0.3382621.1195931.044367-0.2221870.499361-0.2467610.6515830.069539-0.736727-0.3668461.0176140.8363901.006844-0.4435230.1502190.739453-0.5409800.4766770.4517730.203711-0.246914-0.633753-0.120794-0.385050-0.0697330.0941990.2462190.0830763.680
\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "5 2.0 -0.425966 0.960523 1.141109 -0.168252 0.420987 -0.029728 0.476201 \n", + "6 4.0 1.229658 0.141004 0.045371 1.202613 0.191881 0.272708 -0.005159 \n", + "7 7.0 -0.644269 1.417964 1.074380 -0.492199 0.948934 0.428118 1.120631 \n", + "8 7.0 -0.894286 0.286157 -0.113192 -0.271526 2.669599 3.721818 0.370145 \n", + "9 9.0 -0.338262 1.119593 1.044367 -0.222187 0.499361 -0.246761 0.651583 \n", + "\n", + " V8 V9 V10 V11 V12 V13 V14 \\\n", + "0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n", + "1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n", + "2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n", + "3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n", + "4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n", + "5 0.260314 -0.568671 -0.371407 1.341262 0.359894 -0.358091 -0.137134 \n", + "6 0.081213 0.464960 -0.099254 -1.416907 -0.153826 -0.751063 0.167372 \n", + "7 -3.807864 0.615375 1.249376 -0.619468 0.291474 1.757964 -1.323865 \n", + "8 0.851084 -0.392048 -0.410430 -0.705117 -0.110452 -0.286254 0.074355 \n", + "9 0.069539 -0.736727 -0.366846 1.017614 0.836390 1.006844 -0.443523 \n", + "\n", + " V15 V16 V17 V18 V19 V20 V21 \\\n", + "0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n", + "1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n", + "2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n", + "3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n", + "4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n", + "5 0.517617 0.401726 -0.058133 0.068653 -0.033194 0.084968 -0.208254 \n", + "6 0.050144 -0.443587 0.002821 -0.611987 -0.045575 -0.219633 -0.167716 \n", + "7 0.686133 -0.076127 -1.222127 -0.358222 0.324505 -0.156742 1.943465 \n", + "8 -0.328783 -0.210077 -0.499768 0.118765 0.570328 0.052736 -0.073425 \n", + "9 0.150219 0.739453 -0.540980 0.476677 0.451773 0.203711 -0.246914 \n", + "\n", + " V22 V23 V24 V25 V26 V27 V28 \\\n", + "0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n", + "1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n", + "2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n", + "3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n", + "4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n", + "5 -0.559825 -0.026398 -0.371427 -0.232794 0.105915 0.253844 0.081080 \n", + "6 -0.270710 -0.154104 -0.780055 0.750137 -0.257237 0.034507 0.005168 \n", + "7 -1.015455 0.057504 -0.649709 -0.415267 -0.051634 -1.206921 -1.085339 \n", + "8 -0.268092 -0.204233 1.011592 0.373205 -0.384157 0.011747 0.142404 \n", + "9 -0.633753 -0.120794 -0.385050 -0.069733 0.094199 0.246219 0.083076 \n", + "\n", + " Amount Class \n", + "0 149.62 0 \n", + "1 2.69 0 \n", + "2 378.66 0 \n", + "3 123.50 0 \n", + "4 69.99 0 \n", + "5 3.67 0 \n", + "6 4.99 0 \n", + "7 40.80 0 \n", + "8 93.20 0 \n", + "9 3.68 0 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "4637cfc8", + "metadata": {}, + "source": [ + "Создайте датафрейм X из датафрейма df, исключив столбец Class." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1457947a", + "metadata": {}, + "outputs": [], + "source": [ + "x = df.drop(['Class'], axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a98f6859", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28Amount
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000
mean94813.8595751.168375e-153.416908e-16-1.379537e-152.074095e-159.604066e-161.487313e-15-5.556467e-161.213481e-16-2.406331e-152.239053e-151.673327e-15-1.247012e-158.190001e-161.207294e-154.887456e-151.437716e-15-3.772171e-169.564149e-161.039917e-156.406204e-161.654067e-16-3.568593e-162.578648e-164.473266e-155.340915e-161.683437e-15-3.660091e-16-1.227390e-1688.349619
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+001.088850e+001.020713e+009.992014e-019.952742e-019.585956e-019.153160e-018.762529e-018.493371e-018.381762e-018.140405e-017.709250e-017.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-01250.120109
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01-2.458826e+01-4.797473e+00-1.868371e+01-5.791881e+00-1.921433e+01-4.498945e+00-1.412985e+01-2.516280e+01-9.498746e+00-7.213527e+00-5.449772e+01-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+010.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01-5.354257e-01-7.624942e-01-4.055715e-01-6.485393e-01-4.255740e-01-5.828843e-01-4.680368e-01-4.837483e-01-4.988498e-01-4.562989e-01-2.117214e-01-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-025.600000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02-9.291738e-02-3.275735e-021.400326e-01-1.356806e-025.060132e-024.807155e-026.641332e-02-6.567575e-02-3.636312e-033.734823e-03-6.248109e-02-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-0222.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-014.539234e-017.395934e-016.182380e-016.625050e-014.931498e-016.488208e-015.232963e-013.996750e-015.008067e-014.589494e-011.330408e-011.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-0277.165000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+012.374514e+011.201891e+017.848392e+007.126883e+001.052677e+018.877742e+001.731511e+019.253526e+005.041069e+005.591971e+003.942090e+012.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+0125691.160000
\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 \\\n", + "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n", + "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", + "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", + "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", + "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", + "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", + "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", + "\n", + " V5 V6 V7 V8 V9 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n", + "std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n", + "min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n", + "25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n", + "50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n", + "75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n", + "max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n", + "\n", + " V10 V11 V12 V13 V14 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 2.239053e-15 1.673327e-15 -1.247012e-15 8.190001e-16 1.207294e-15 \n", + "std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 \n", + "min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 \n", + "25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 \n", + "50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 \n", + "75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 \n", + "max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 \n", + "\n", + " V15 V16 V17 V18 V19 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 4.887456e-15 1.437716e-15 -3.772171e-16 9.564149e-16 1.039917e-15 \n", + "std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 \n", + "min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 \n", + "25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 \n", + "50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 \n", + "75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 \n", + "max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 \n", + "\n", + " V20 V21 V22 V23 V24 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 6.406204e-16 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n", + "std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n", + "min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n", + "25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n", + "50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n", + "75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n", + "max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n", + "\n", + " V25 V26 V27 V28 Amount \n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 \n", + "mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 88.349619 \n", + "std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 \n", + "min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 \n", + "25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 \n", + "50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 \n", + "75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 \n", + "max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "034d2fd7", + "metadata": {}, + "source": [ + "Создайте объект Series под названием y из столбца Class." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ae733d3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['Class'])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "567dea79", + "metadata": {}, + "outputs": [], + "source": [ + "y = df['Class']" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "a535e52d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(y)" + ] + }, + { + "cell_type": "markdown", + "id": "7cdc19cf", + "metadata": {}, + "source": [ + "Разбейте X и y на тренировочный и тестовый наборы данных при помощи функции train_test_split, используя аргументы: test_size=0.3, random_state=100, stratify=y.\n", + "\n", + "У вас должны получиться объекты X_train, X_test, y_train и y_test." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b6876e1d", + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 100, stratify = y)" + ] + }, + { + "cell_type": "markdown", + "id": "f1d467ff", + "metadata": {}, + "source": [ + "Просмотрите информацию о их форме." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "19181cba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(199364, 30)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "39dfce0d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(85443, 30)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "3d6c948b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(199364,)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "879bac5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(85443,)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.shape" + ] + }, + { + "cell_type": "markdown", + "id": "d3fd9970", + "metadata": {}, + "source": [ + "Для поиска по сетке параметров задайте такие параметры:\n", + "\n", + "parameters = [{'n_estimators': [10, 15],\n", + "\n", + "'max_features': np.arange(3, 5),\n", + "\n", + "'max_depth': np.arange(4, 7)}]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c3eee1ef", + "metadata": {}, + "outputs": [], + "source": [ + "parameters = [{'n_estimators': [10, 15],\n", + " 'max_features': np.arange(3, 5),\n", + " 'max_depth': np.arange(4, 7)}]" + ] + }, + { + "cell_type": "markdown", + "id": "bd4eb9e3", + "metadata": {}, + "source": [ + "Создайте модель GridSearchCV со следующими аргументами:\n", + "\n", + "estimator=RandomForestClassifier(random_state=100),\n", + "\n", + "param_grid=parameters,\n", + "\n", + "scoring='roc_auc',\n", + "\n", + "cv=3." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "ef708920", + "metadata": {}, + "outputs": [], + "source": [ + "clf = GridSearchCV( estimator = RandomForestClassifier(random_state = 100), \n", + " param_grid = parameters,\n", + " scoring = 'roc_auc',\n", + " cv = 3)" + ] + }, + { + "cell_type": "markdown", + "id": "83498585", + "metadata": {}, + "source": [ + "Обучите модель на тренировочном наборе данных (может занять несколько минут)." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "15ae214e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GridSearchCV(cv=3, estimator=RandomForestClassifier(random_state=100),\n", + " param_grid=[{'max_depth': array([4, 5, 6]),\n", + " 'max_features': array([3, 4]),\n", + " 'n_estimators': [10, 15]}],\n", + " scoring='roc_auc')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf.fit(x_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "id": "af25d4e4", + "metadata": {}, + "source": [ + "Просмотрите параметры лучшей модели с помощью атрибута best_params_." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "fd2acffd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 6, 'max_features': 3, 'n_estimators': 15}" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf.best_params_" + ] + }, + { + "cell_type": "markdown", + "id": "f824c005", + "metadata": {}, + "source": [ + "Предскажите вероятности классов с помощью полученнной модели и метода predict_proba." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "cbaef3e2", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_proba = clf.predict_proba(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "1216945d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(85443, 2)" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_proba.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "3c8982ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[9.99070828e-01, 9.29171738e-04],\n", + " [9.99704794e-01, 2.95206364e-04],\n", + " [9.99717846e-01, 2.82154033e-04],\n", + " ...,\n", + " [9.99717846e-01, 2.82154033e-04],\n", + " [9.99317795e-01, 6.82204754e-04],\n", + " [9.87539019e-01, 1.24609813e-02]])" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_proba" + ] + }, + { + "cell_type": "markdown", + "id": "1d4cabbd", + "metadata": {}, + "source": [ + "Из полученного результата (массив Numpy) выберите столбец с индексом 1 (вероятность класса 1) и запишите в массив y_pred_proba." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "80070310", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_proba = y_pred_proba[:, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "546b38e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(85443,)" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_proba.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "569d8dae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.00092917, 0.00029521, 0.00028215, ..., 0.00028215, 0.0006822 ,\n", + " 0.01246098])" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred_proba" + ] + }, + { + "cell_type": "markdown", + "id": "b36472b1", + "metadata": {}, + "source": [ + "Из модуля sklearn.metrics импортируйте метрику roc_auc_score." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "ccafc28f", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import roc_auc_score" + ] + }, + { + "cell_type": "markdown", + "id": "6630030f", + "metadata": {}, + "source": [ + "Вычислите AUC на тестовых данных и сравните с результатом, полученным на тренировочных данных, используя в качестве аргументов массивы y_test и y_pred_proba." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "44a72323", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9462664156037156" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "roc_auc_score(y_test, y_pred_proba)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd57c08d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/x_test.pkl b/data/x_test.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f8d590e4837f23c4d0c56a10f96d1e630b0c2dd7 GIT binary patch literal 17920 zcma)Ed0>K= z)mDp&E~F@GX)Q%#O@g47rd6$?{LVcyb6@1SKi~Y}IQQN6=AJn-pZUzpc`vK-qI<6> z^3Up$VoDxsO7k8wDK*|ZA=NZ7-e#>HWJ)&$%m3%v+Re1huz6W4#ZDZTYO{Kjy?uOg zdVFfKDJji+qAA%lE>a|1SlP6%S*xHcZZ#m@*X~b7DqP`gl`ns%ff_7n>SCW>RumdTPd) zbn$j;WqG5`T2(Ad-jZtzQL=4)D#Y2|vW3~aLMnvR%C?ysS}TuDpPCY{y~Sy8H{08D z@YRae3e#<&p`inwh(GyHh?;JTds|yWr9NT(2Z`H6;L>-%UvkmfyUZl(Ddn*T-PS-M`G#+7tEl;(YCUO?k?y8b53 z_tHF{=5=UnM5712evjtsXgo{fV0wK&%}>z$51MbLInX?l#zY!V(zulFbE0twT|Y+i z{xp`-b$=R<(Dfi1htrrtueYRmFpa0_I!%?D^dEP+-iYS$G&>X#OpYV`v;iuP>mvndXOSoI%%b&`8ju zETQ=mx^E86yV3X?T|YweAR0%~^|xu(G24%|E8GK3#u>=7VYMM%TB~d?}5gG;XKYFVpxXUEfA?cN%Zg^*c2Gi^c~u zen_up(7Zj3z3KX^G=4?bm(%D**I%c3f0{R^`D7YxG;X2S!)WYI*Z0uqMxzUj?diVW zG(SnBjjoTP`7#=J(sfsw7t#0yUEfWk&Fo=y8vhhz_bdn5ih$LSCD{UM``FSoRP{%6 zjUQ<<_Y_Fj3qRVa-^Ai8ukT=oQT*j!T_C|qSe{4r&7p;A(7dE;4>!6)g7#Ns7*y-E zeAucEmFd0)IA*27vZdOe56<%}(BQMaMXxnz1qH)nTW`C%ndAh)+LT^3&V1xf?=Mw< z{wMtMy5;B9Q)-VtIf}9+)%@=5>6g_Wmj=&Ye8&~#Y>kFF*~3qMd-*?lAIa&KgL2%vz~5dY z-q~ZafYgVJAG|rXLjAl9yWW}$E#<9zL6;*xKl!Y>_nvrTZ#R_V-yFObuD;53`PiDO}0yQisXV?hRuc9bYzwUmzoUU&Nj=p-VL>0>0*TTmgKj&^FJaS7r zQ}0hFy006K$KE74T~J>x7SJBs;}u`H=C<&=i~sG^_0~=-_uhtfx||MgH7fY&pL1%Z z$l9I0Y)1RGItT*`>YazC%NEK2(`2Ay7z}F+-MnCT-E&r=S_p!V*-2VhWlsAF&7U2gOPRpt_S$RU2 zBkk>SfG!8ub6Itl3v#2+iGsF!az2ha^zYMlFk~9(EdYcuf7Un90#ZL--C*XcnRg@F z^7Awq#kc|?%&scbHnv)j)cHJ#vB=WxEei=V)UaXBfR#M|J>{Nv}S-oLj=&vTyprzq4mv zySVoUlG6^RNuX#>?^l%j8XW8^R1?2V&vmQhhJHA87<{I|!KlVh%SmB7@CISEUL!hK z$Zvh&^bZreWL*l@KexR9hz}PQU7|jCT}Y~5kM4_s6(4%;eDu}>dOm~wafw7fn$a8j zX4dTyex316QH=bjr34^aPr(3q_w|P_rPOBscJv>Kw{0vSVe~UE7&-ToL%TZI?OOV8 zb~Al_!^djKb4mU1m9Osp_e&$R+uSg4Tl7nHb1|=v8Dy%4~GX3=ptqj-0aRK8W<4!bu;Ii(! z?_HhfeU9sqdJyqc;!8_E=pNVU<;wZ{==r;V7G-OkX`;(T{Jfyb_;`=x4D^AOzkj?x zWQPmgM>tnFf$Yss#{(_Qc#UWbp7$>en3A-OYu^_7dgOC=EmGV1R@>b4Y$JTmoG7$|JLTl~q|y5V zaQ|mbq4m88V^-|0o?UH=E=R8Od|O?Pgtf?Gb#qhUv9XO&PVi93zqPVw$6pw)OY!}6 zC+l)#9DBop`w-;`XD;+S)zY=8{<-DtT95+~c}i4_L&QT)MnTWeYR9=qcA6tr5ja{kR8=@WxqCk-V$Ln#FBkipE6y$@RevtQGvnDew z+jYGe;iz2J>%YRIdUJ0+QW@MrY(P*RLf8f=0n%j^@7H4No`2@7EO4-an4bW zrvv`rJJh6xzl6^tD~V-VJDFRaXckE@R^EspU?^UN^ATA9QaoeLVjNF$QcY59$T@JKa+|Fr1~t&%LSt-Hnd@t8#ULKj?@h^G=?mU=?s-_XHltlfW%@Lv}+Pj<`KHKb4>J zUMA%~9`J@V96KL1!W|jr-=VnBJq!kaZi}BYrV7~)+2J#ANBV_l05m;W|9uN=#PwtW z@hZFh$+#16Q5~oOcRbY(a)-h=jaW^oPIAa@@+XkJ*?EXumxl)oZE(H*)c0&CCqIhh zaGgU@9>@Q4#kA(E`h%9Hy0pO0DNzA&g7=^IDM`0$i8p2o$TCHZs^Dc>e>*mY?Vug< zyS}6cVQ+}jXVn9m8H^?#lF%CtuhED-*G)*y!VG#2`+Cr>+CyON0ZYa|{8+aG^q-of z4@KelBWdX;grhk_;K<03gH^xd{FMBsFh%ztX*aXTe^$1H9dU0I^eV9P!t(J{;-TQz z%&Wu_6$PSUqD&+o)AKpvj+C?B0+K&9^@SsGad#(Ad_I4}{FHDItWE1C zy_Ml_GHxdke~a;htXcCK1}ynX*PD_5Jf*jU#?SR1ssD$B*F}xtpU*dUX%}yd*XTFJ zD#~H|7zVp%H0w4li1)`)ZybN5y$$ezx3}MKpj@6x>+z#J&x!P*D9n4Moepe_`#f+6 z>U{*;Gx1S9ZhNEM34Fsmus-blwfDl=^>&dQmxrZ zlh;4Bp1JK|h5v`!y59egzByz)oyJzziOaRS+O zC-lSFWN#zt!IUMzsW9PxFH;D)D5{}-AgePB4`*X{&a{R4~z43aGFZzMs z@V>+92Ysi@K|E@yuLt=-q1t`h!700%6TY1s3a^N$Fra{Ol-FY+Jm`XPZYS3dJOKT@ zNY@+2Ietz>K|d{4k+(D>I~V|%N0gJ3l|s+g9`74#Asnj?7e<|M`*e&k-@*D`v6?1< zo9mYSVe`CmX61K&9!I;D_1FGfe|rJ6s`7|UoHsl29ju#NLmVw+`-*|3?X`-CF|U$* z{2R)%7W+U<^7w$(cbFeUkiF>qLXmhg=&UNspK$~;d-q@;l-mpLXRKkx&_74)#>P8?o@XPvta&{ z;RlaJ_SD8VpX7AMdUH1GLs2lEomXYL;7oSC^%eMS(U;DXZ?PQmxBSVdx2^uVT?-Y7 zKTlZpLWRV{g1IOsE0W~c{dS3<@A-!cRGBwEZc6?U069NKe7|8V`x!kKP~0x@MV#El z{e}84F0aC>=E3J_J&J;P1lyIOL`Z^NhYep0?5@?fkd7{@tjXLV93UuX+59uMcbODOS^hq_$0YncxZ zX^i^68wl@2Kpg?@0f{paIYaee7BX~=3=UQi5)ZIy~uaCKN17l z?kWo70m&)Ddl^Tizq)n>xxdN0pN@FI_1;IKd_Aa`a7Xliz8ErLS>_5}w9OAW!;`WgMIJQ+3JasvQ2U%246#FCf|25;a3v3xC z3~n>$&5nAL`!D)TGZ*y{5a)`|@$WGBC!=tM%R1w{5%H@v)-9d+2??KOX7|C)YMepVt2y^2-S9bG*avR}}6yieiW$2!!KV{_xYB6Bi3Rl=IWVbij2J z?q_9E+-?LK-RU;K&U=mOZ^%ojZ&nfva@%yCNc&W#4=C^NO!?2;)|g*(>O;@rXm9MN z&9sKb&+IQTP+v2HQ~!`29QWrbJXriP9NOFa)A1KM&r%d4UiB2e7V$$XV+M@kIv?Ae zomWY`nMXWigBR5`h#&kvd7~^Rjc-nV4A0FQWnIO}eVN+u$;(l%ezCi=q29P&h4@PG zXMZ#t5@Bv_=>6yFP0DK!0B@|Fv&ZE&IdLQ9i znLfJo&#VlPy!U=rI9(jBSjRCxFwTQz9y6Ei8vf@z80%6k;qcA|QHwrkW~evKYgRSB ztjfM{W+U7mQRlmN?##@zU}Jle&$Wr;_)E~{QJ6ZZJ;#LziWhbsJiysW6QMSd1dDJ~ z@FuNlc{rsNJs;&+TljN*kcWI}!Pm1|LDmkKRovFzPbiN&$SX<+N6CIgzK!QOcz=xj z2XU)AT2`p8$5YoM_v>Y!&Xw}HrICO*&iMk_fx_$YL!PkDLUFS#OzS^p z+k#E!%gJ%m^>K2LR_HCH_(T2Q)k7fOy*S#nE9=*hSIPSHY|881TfmhBLEuz-oG!07 zj&oQ)AUOgLWc)v19OopyU7Lh?@B|-7d~0``3BR1u*JD(_MttRdR?u_%w2W_9k8~sb zeL!-0(s>(ieSf$=_vZhgoF=9Jexb`T>MtNpvAy*L zQ4ASBDaD!AP4=$WDSUAMaxI8E-CCo+mD>Bvg z6HE1omtfW8BIwu1o`)Fe4S6=%K~glxc+|m(*5jx*wga>)FHoDki&-T0# z`!VNL?58si8341=f^C0SA-_?$o-_sh^N1hv_aCQFzp)*d2KRUr(A}6Pa=#JJSvl)G zQTAzn=DNQl?#O-aFoo(;O(Cr9>bJKP73=FU(i_SldlPtF^4SYQl(F8hj>!338~7$~ zewETGtw_&~JOuOCD{7`Bq>M)~u)DTaf#N!(<9g)&6i-2(R>u>xV^bgeOnp1WQ4Yr+ z*_WBk`i+4q^%|)A&l|^W8GjD-L^+)UK^*oGC%=r(IWQkZJfQq1JPLMw?<2Tn1jF4}4zEY%!>jqcP7L9d@p`1*astUu1K^YDAKt0BaFcEaM)eSB z?{&Ce5{&24wzK^^)CQp}R#CtI>p}I>}@N@glGH%@EdT&d( znYO**!IP$->91H^QAVbxX+~L_HDD^R<9alkctgYZvj}tU#<|^kP3z6qf^E z3lBR_bxYcxfn8yk)(~FEc0qiH`9X;f+Tq$M#8;Zbg}_?_*ZAA}enxsjeob*Mco_EQ zIvVRu?z2}V%(Dh`g`fwwZ{~Nu_FTP5-gBMu%h5xhTX&KBILShJTN~&hP6b?CV9$e% z_<`g-JE<=t`lAvSH*!8iajr~%Tk?WsbWU|>1L%ke!3j$$5#^ffo zMzTwCrLFN*#avu!sUwZHG8wZ*9a^1yD{J_3H61PiQLBn@V z4JVB~tFOnXKKhq-tY&>v;s@dAcRRUH+XA1vOnp?=!vg6Xaf&Sw#v7_E3 z?&L8K@u2hZzv*&}@EZHuRF6*@21Vl7)2!phcrEK4f2U&oZI%yQyvk(wU`#P@{+Y9OQ8U1?11233%w&?R88}m7?moc8_+4C;0TZ$$_pLw@9 zp8CQ18o@7Q?4i$Bc;D>bE0Fjl&UMQ8BkbUbIIaB4=tHD8<30w~#c4e)!k^p6ka*BH zll-S0WKY)!57&{5$4sVf}v+xLZcTo~Q& zXxE&V;QlZ^#}Mx`&g*6TN%O&a#L6y^^mE7RL&Ep!_J;e`T9=FUi6T{&DH>8dT^|9x zU%pFUL9mP2{@8!) z^S@Pk+vMVUM4ZEYBc7xv zifqrNpHnx9_Cj1;>X&Tof#1id$9)O9zm(5yP@gG>>mlw?DebTO-woQK>&*yvq<^oZ zb5G7ua9cY*x{UXS^=8LwsW*{tBfiv!nSC~dZSBi>v!YH9f$J3xHn zehKk|GUvFYe=3pGPx6L&>$m29Ka}fY#{GJHF6#TW7y@^jpO_STfb${dA@+W~#L+JN zeVN+y{f%FBy~()rjPvB*9})*?(&$`GNfaD;qn|iZ!Tv)$q|AA8xt{bXh@-uG(7APc zJjL@59KRF=@#zAd!;hdm%ikAn7Qfgy{~WJ_?4a;9>SOpIKTOM{Izuyf`S9JsJDd5O zp`sYY1@E^g&MU>HpQbBpK%8uJ5*1Jk1^ulIsn>x%4Nw^!@-X2h>$-@YgN zPj9T_>vc%B2Rnb0dHo(b-&?sQMcY;b!Uj1>weBy~&L+Lye+Y#B{(gNz!azD5Wet%&wpK~BP z7|q`!=!A8|id;Xa4}q%==B}Nvp7a(${dMkFDhl?03RKMJxWAx+)MF~=+k^*oGcbO+ z+rOWMb>-GzOI)?!fU_M(W&C-@xpn#Vxeus5+9s0n^>TIPGW~k#r(3C?^`I+$?{aUD zjtAxQdhP?_xhLw�-PJssGlwvBS8YDEau}bUNqI13brRmCJRcH{&`vo(~~@5FSqZ z$nlZ!n(f+tu7>rq&^NPF%T<5tcKtNI+2g4^xBMpiOEe4@Ur(C*pK&~uc}d-L;yd23 z{?x*}1F7~tOnJQ_U$};KmsM0(9vF>vzHEwbN;lMdneT;4y)`G?$@Yh?5!Sn1*Zi)p z$H-6de7v2HHXeGm$#j!({E_id@I>jaMgE{F8wag8U#QD5${Uf#5|7G{fYXs$<$>|Z zF>ln|R4HWKsRvzlFRNH7oOz{VT;RMB;}7|%IOov+gRC}ta-2Z>IOQ37lf2hChU%6+ zAi5Ey*#*zvAIY;T@b?#@0nc4AZaD6bw1a-k6MaB_6Fi~JH^I$;Pycj>{2q9%kR>_d zo8ZCno8Tt#4e(m#I^vf_%r52zW>@XELCo%De;K5u_FEt37tC$UZOvZhcINix4(5*L zPG)a&XS0vlSNp{d{yMp7hRs^V$J@7yw=e!kib?#ciOpQYS|v{V6_hmFNO`%+6jM@$ z_ClDon)nr!Sn*n#%~~xkW4!nslVtJV%4tdC$7nxA5~76E3aOwyO@=8WP5w4WtoW4@ z@sljtub}YHv&2q}PdCY*MHwmHYf4WSKfIznNo+!rX`KA8t0^gIQf6B0*o+kM$)@!9 S*d$Y2d{UZiPFhBs_x}M=Kguuw literal 0 HcmV?d00001 diff --git a/data/x_train.pkl b/data/x_train.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a96911ebdf170f74a84cc94be88e78eb277eebdf GIT binary patch literal 40546 zcma)FXJ8dY*Is%Nq?Z7JKtc!*LK>ZL553of6cs~&gdjbI1W=4fQ3wJOQBV{t*iccF zB9;IuAVm-xMF=h6OEDG@6!e>&^PJsymo;C0aG1S&@9fN+-ZrP)Q*qB5`Y*S}w4^EH zlF~!QPEAV=Nl8nZoSc_isaH})QXl>Q6?rWm$$Ka-D7S3lJY{3#oVImXrVMiN~pj$;+)XC3Et$nf#cOGbd$CNJ>jfnyKbXOiLa+bxL|hTISdc z_4V9x`o_H63Tj#UmKAvejrn;!OO44}m^UIXXke*8?JkFf3+7_n7`e^Kwh|P~RUE9n(D~FIoL| zZrLg3whzf%pq?+aYHq_jZIxP8a@vHkYPb4N$!H&0Y==^7+dj86hK z!1W4%mvMa(#?1lO0RDi_XJDKL_zqw`V0GNL4C73U9|jx;SO@R~?%Rp+5{#!~+!Nyo z7{7+`Zx}BETn4xUpWg(W4EQoWUykv8fX@J)1PsM}(HP$Vd^4Wvs+~?*cBt=UXxEhH+(#b1)u{@o>OEz$kqFD#ph#ZVea&_ys_j2F5!8=i~ZR zjDs=$8RPC4H^g`(#=io72>3cae*xnvfc}7u0N((70`N(EZxrBjr-;R zzK82A0k`7%LcoK#-T>pjFt_Jp!S!!3egoq-F&+aLfa~9590k}G z&=2q~+}9Uy53cXRxFN=mV_b-FS&aPvU&QsdFkXalH^7B}Ie=$yUqy`jVEip0;d(H} zcVqk&eFgXpU<-VH5b!Ttzl!l8z_x(*16Bpx zi~H9Bwgx;3xE%L=f^j>H%K%Q0>wxod9l-bm|G5YlhWlQ^co@b90Gk26jn69pz6m%5 z@Lt>(jqxeKNL)|AxFX;$xLy^o3$Djw{3ymX0N3MsKa76`T!8E07#9M10Dr^h?Exp^ zdN{@hFn$~3k1_rjU#31ywt8iD-S`2 z$7BRN{m-)JJ@{o~dQ|1_Uz%2mWbAv-xGtxL{^;4-c>bl~TYYHtD)pBznBMFbckjEN z`nazm!~BU9Vg79jrxE5C)@76WG4+eJsoI$FmDhZqZ_Q)=TH|s7);tZ{{uogHk<*?b zW2(n@dLO^*H-f6qYX4@p@5|vnnQt~7*l+%YG0e-R+HdzgJ*##QMZaAXQty|~t$AKH zr+=xTHP6hS0>dh9IN`bapr_saAJ(N5^RJNT`8ECYyRV-$&+|XO`o+Kwr#*T*Kd#5mIVg(scK=-i_f=;YnvQWh z-rqLaynn+RQ2#YKfja}+TEDCJ-zVG}FWq@9=)R?Auy6JFdj4?w^daAl<9k)WeH|Gd zn#bo85lMPq?#d$l*iFM}c357mE3Il;zw12@@8d;}i49Ah_Q8Fz6gp^q@p`uN|-CXH9~D^R6HO~zjR zbC~OTR@5&Wdv}{cPsI(5?isheF78X9<#Yb(Jni6hJfCyl^!ZoIrr9RK{n?&A?z}ta z^0l|E_2}n#yNorj{{1eeJR0qutB?I3PI(LG-*cpEW!xwDmTKYkb#Zr003po~OZ&-7gPX zx7}J#6TP8Lt@SV-6nHeg@0Rrpq%rHhi~PA`dE6(@m5uWQzCE3d&s$NM%P&R8b=z&t z!{_E}&7;}Ql2ab$3qJfifdgrE#fHz{-%{`t^At^Ix>*%r(WyS4Sq-Uk{67~$6E9+( zNZMtR%g$=nJj`#-F`Y(%AEXQ=&4%{;gYRLUoN3ncYre5S))zou8z)Ows$4QZ@G;Tu zy#~DhM@G`|n{yuc=y&h|!^scyKJUunbGp`oYF}yf@~xK(N?gx-YZL?8hx0CorP8ez z);&6;G@jqN9*qZz@9_E7BhA0EZ2s)c4~`!EBl_$sXTjg?3DgF=dm8caO0y(o}NvEhFZy6hUPt(m~`V?|5e*|g0A=q7>YJKOaY+^krn6}T^G3AHN_W8K! zJB$zY_<8$`V7Y%-&Zj<})7E!1Ubo65#;XweFy+?+?K=ow8;09F4}6rNM4k`RX_b+T z*Hug7_nq@-J}^-5B7hqIy?Vp&YVTU-qwz38_?BVl^$#!b?44mUfCi91M2+h{F!;B- zD`6g-Pfi;6btvimo+|oI9qRh%g1;+;Z031#`asUzh3^}N$;J-;aol5~LGR6tc%DI> zY18aqHx9cbdI{z!!1F6Q)$19b4L(|)+B9wQ`=r>9v7SiiskWZTeELVv@3+6n{wc5l z@NF>P7bkcPxm_?3>j~%eyaPS)Qe(=h-X;2CSCNZ`;f>d(`qAaoCFlpWn7^gR)5E43 z^4K+3yw>!l^ifSe1EMH6+2>4rhd^teyzzSXK+Om|N;^;|R15bdP_GG7i{|hB74tax zwalaC=9upE@JDfP4Z30TP49V_4-|UzI&0f{RqN|d|F~k|jS}Z!`s+x^OJ+UR=MP)& z`1d?T9+p!VJyo<|t8ejx!^`jcyXT+6N1f}@^5xkqoL3m_eKWnvp8MaybM=D!6Fe&N zd}WHcXHR%8{Pp@TUk?V~S=EIe`~CR$kEF{!f)4VhTEDC5L+LyGoYm?56{Z6B9p*6% zwqJzqiabXKn;J&}&vO8H=od>bzTe@-&U1o4kmt%?;C&8be3>nFi9ap-_~h+hPBw7G z1C0fT3O(l!H9LOu?*{xkfpIkV%b*>c=w;ydh|Na zwzKACIz8+8Q>ix@wLp(X0*%{0YT%vLpO40e|7Eg0mlRC>v;iphJNb^tQ&W1s+6VI}eTVT<=m+OkOrC1Fvpfy! z38X(aHEG#w*B2HZ=zUG{x7Nq<@QjD&!8wi|L4Vyh`SZWSz2w|9*{2AWqaVpQgr18~ zl(4mx^}F8tCia|Z)%VYZy}cohc69O|F#FM4cs?f{2!GSRmk>%b<6moc;I2+3&a>WB z6Mq#wD1idBB673Dd9-}~yfcOVQ1y7-5|)gL&Fx9(iuV&~BHfj(b=Rfp6r5QKq?} zLxj)6eq5Wu{LLqV`P#GDtcOH3prx-=8#V0IA z6aRtm3DG+unGVylF>hP)D7Vw6!R~Tyv}SCD9#aeX@nxaQ1iFwLbfxMiw_VT2F#j&6 zvplUFLSyC?rJk)Q`01n{k=tyy%6TNvr+1r}D*gi}{pkDGB;!20&?iR=zxaLLD|oK1 z@MDEo_$PV@+b7W9_79+;R4Zv$bXD93zEdE05YGDUM$xPMs7<$W`Pt<~&U>$?2w#P@F#@@m!I#tf#=B~;I-JDhGDAjo^~fanv8ydo|xN<255x=`(*&H zmEV%(sq%xF3tBCDd_($j{e7oLg}jLMbizC$_YK4JT0YhDxJPlFDy82PJ)|!sRygQ0 ze080FpU<-C7)LOkJplc!a$`!Ya=zF0%Hp?lr&IP<0S|Kfl}M*rpPHBjy!I#M{8SpJ z&1dxa-NDXMi&{KsJn)+0;7=@#=Xc@(+Z|J2Z~N1}d5QnKFj(|GC%%dNG0eYwKc?@Q zaiskg1LYi@^Jsq9H=FhBrIjg08|?5W=EuQa5dFe1o>6Xt{4bBPeOf5`TL|5IA;eoDh>R}YdyByeol$EwA9lcwomJWJ_gX93Q^CFY$E=6p(D{p!!7+y`$;d=qv3y) z>NmNxO)uVZTl7)B4*9coBze9#@xi+;E4jM+oZq-l!zNYB$A}?&a z-Z1~N!FM{gri92fYpOgkm)DaM3%{jJw|c);9rd)(1P%VSKazTMyLfa%XJ6n8^n*j< z9}VI7#hYRmH=(iHUio&})-Bk#?(mby^BD%)*~dNX?}nb35>DTJJ~=Yy58-dvw`HOq zgtDD|6Z&;rQ+jH3)S=3`vVLzp-Z0iGaUD?pG~^HT>%d;LU~kNh*TxB+Iq^W_^%D|j z3LsP4YrEe~zqaj0=6j%@{8&=oQjrYNL!9$yK9DJXu~t+rvPS!^2XLP{I&ZudeN_A3 zl4YObX?Zm>p!FB_+c}Tm8|#S?v~JzQa~Ib+g8RMgMw{=LZvPpd2tNy>PUZm_;x}>f z9nF8A$-;S8qu*xCJ2JQ4Y0Ts9C)fK{yD{^(gjjm}hr}5}8+-BVVt#T>H@!v9`O=vu z&kp%%wD_-`>k&Ix`$wnv!j2kF9Cs2taIQ!1SCaT$YEo#c*J~Xb=YBpc?+anyiyt|G zf|S*`ex}F|_w!*tbyxCV*XF{A2i^Ib_{rH%D)bXa4f5~x>ps0S=+rrnmfOm1 z1%16T9l8F_hRBwCv2X73N9$D=#QurqeQN}K7Jamsou&0&r5|WLdQbq}e&Fuv`wzdw z^UwhJDTKc$6UK~BUpng1?%wY(&ma%eLoYd>`g}gaY(K2<9rnL9V!U~!gPf;>zx7ek z*>AGXdoO?BH4pO%p+Cc5{q~ec>pjE7E*VNZw(jG7SN0s+PhlKC&(FrVE)D+eiRg>~ z(cj$pwYKB$Zp8E#IFhC}tXj~fx;wwt=UEE=x4brVIBr==@aoN2k9*vV?MCQ{>xZ#l zOvO_T{U>}!^u!Q8N40Of-$C?MaHV0<`>geQ;aibM?0Uw}rucpg__f4;ocN~qFK7nx z59-jR*suEk+)(_zhQaf^m(mY3zm1c9gI-l#>^a+S$+q)USM>&v0|?}`GNlZ z$6&9fM^f7kHtS<;DXPC2;TAsS66`P-V^8?XG+3s%0d}mWU^hA-5 z&hrs`V?SRzy64)qV~e8f^YHeMik#y(py)RVG(-oU#ZGeKo7Q)Ol35R1A4;a1=FbOZ zpPl$7{sYaAo)^4~r+nr04*ten{>Xat_wJ9NVeQXX{`uF^*15Z_hx6cSF(17gN9TWS z^7Yh2;iJy=Xgx>8owa_uFp9?cSAXYHmajFB_xW(X)_LUbKu=r{&2haO?)sZvM@T&U zy6tFpjfuasG{8sy!~S9Y0D633`x5OR%@4X{@jT`Hsq?H`C)ax<-_kJjzOL_Nt&jZ? zXFS@^HwouR@svI5mEfL@BySLQaL#z98x=Rx`bPlrVN*N7&-bOpM>QUP+{K!Q^{KNQ zpGm|yD!+#J(Clj_`>5=}iH4n=<(Ju@`-#_aF=ew*IfA4S_ z|LuhbT3@&Ha(*rneKef;&O_p-2&BhUTI)w+g|9pLwa^bkv0D?6e;{_F6W;`{^*K%K zOy|Gtq%!TqZ`lp{iP-Ok!Euh`I3LJUpWb9l`n13J z3!+OrpMTF&ozgzL=fTQ3%XuEpVAioA>rS+KOgxaDQ$Qin?--CNK1EDiTUEwZ-EEyekAsf)`z_S|J#4~ku-ge z5kEyU>UXpM2SY1}e&m!tI<527GoFIMS{}5<`MlkV4xXCV zqiiFIpSs5vSdWwZon9rz7Yy@H<%ei!_|v)p2O6iA6a6{{@j;O*hQa%F4E#;%KH8B^ zr)b?AzXLig$iTQgg~K;tQ;g2wsn* z9Gw6r`{u+0jc*+#k1LpTzO%>H4>TR`7JH${qxHE}r#(8~?IHMGeB#*NyeRtyxuEm| ze*RD^Psq@J&4C}6fp6}9f3~*~KUH~Ldb{6}^?*+A;P+=gT{s;p^H%<{r#`3I<*d!czq1~5Vjt|d85ZrH`NGsNf+jCS=cYB z`&cXV6T$nZ@*r6+t3o-WpJ@E)me+aTls}Q>H0V+7ZH?U@Ze3FTglDpzQ0iE^(nvE9 zwoLLtL8k`n?ZQyz18W7ZgXp7`%c{-l7VEliVrQ}bhJ1>fvBddm?)**gTGLUxF#2jp z`M!Gx3V(E-kHmx7PDLCeco^ryjuU;E{Y=MKs~d(UIXgZbRWzY{~- zmXt55yGQ&x&Uy6jE4j`7w?Ml1Lcy5cRqeb0H+!4)Em`+q_#Z?+0A3ffw`D!7*R-W~ z?pakd@V4NaVYtmxWR@RRf*v2(oBrCT60Ay#z3M&>%b@@o_vG?T8(Rq8IQLEBW^bsh z=rPs7FGt6dPVbG9Iu7T)>EF*0d!Ygy`Zo4*+k1u2c+X=PyzUED9i`?&H%F2VGc^_a z$7Py0E2WeU$O1Hho46ybisd3z@*nmh|c94wLT9TV?GV^Br5S()ryh3OxK=u(OU1 zCY43oWk?OdYw(@Xv%nwQvVN9`c-Ek%G~k1wyT_J)!a>$ z+x=GNk^3h>&P4J&_lp0zF7^9+biuYbn?HN|DMasOzYFkS)d1?7cC*?~9i)!MiEsKm zduC&w1L=hsHzvN9;^)doCGUyzHlV-NisAg@E`kTnc?|tmh4T=LLP@(v+l##*cp!Lc z>mhocImok9er2;*Ku;(!*Ke56Gxn`m{*Lxr{%+TA zx4d}$$uCmRSo_9!;KVoW7u(>+bQnDX^%xRgaN?V$_r{1X>?}it%RTk8uiEy48^7lK zH2D3i#L|L}D)H%k!2@|Nn_ugBp2&nhu`LDl%5NI;x?R_%!SE64z{kd}SN2z0XJTRCS!}F`UneXvhWe2RhG3|BkY=*w0a&>&>=A z%K8m&KFWF4=RI7O)juLihU#L zPa%BXDHlYqVt;ruTKo9M(?30R$iizke!%soqDRG3GZoG%cuwqPth1P1qWR7v+03s) z!)SQQql3cJ7Vtbd1F?S=9vNJ(`-4aK-S`UmVuhYN=XF|sOFOSq&zI>(s`xhkwL0Oh z=Og}jowxOo$j=1sn{bc2US*hn)P?b{N)(OW*l=^3FYP*4_AA?V7yAzi5TAn{KWHSc z$Ewql^AY)pddx1+6GMWjlq#=Z^sxJSq`uJ`-&jv|$2Y+PEmv;!pu;nljC!$wdtQLZ zZO-?X_;x((G?80Qyw-G6CY|YLXaoiH9vA<4LtAdU;Tz|PKo2<@M`l}#dlY}v=OA`t zF?`eVRMqY3`KpCb_g}{DA9a_zel2=D=er1hR{p9KQqVrHrxrMufZX<_5#N?oB^eUWa?j_}d4o@g9@3NadJagu4)YZ2gQ2!Y6+i+TZTRGTId-)TK7~+Z$-!_-xR_8bd>0$^*H~a-ZATU|E>4h_4RDuBW`vimTsA4{bRrf45!|!<(SHU zW4)>!W$rmJtxPvt&bh_W84o1hIH<%pt+q=OC7-7O4Ia>a%hQn(&uRzy6**}boKJJY z!_NhNnKP88MCNRv9wK*v*Q2LEZiI5a(F@}5ZAKlv{(MgXlP_tqZ{fuU+`)#u@O3cc9qhQ+0ME zHglav(?JUAg9Btg6R3)EQw;Afd8*ENgnn2*=s;hbHVMeK8(ELQeyhXhI)?(pgQf0J z)xDc-Jw}L~1v)L3r>fKx5-+M%|7?WR|0JEpey6DyRcS7|${F7V*KT3ioV zwY}@U@$+2Z{GHyAb1ME5uNJEt`I{9K1u{N#;9^pg#{R zu@9ioDbrsa((RreUdsEX@?8{Zsz0%F41@bL9_4zBL#W&J>F&kfARlsskE*;>QxR)! z=WU$7Qt{Pt4?}*a8NByR>_)SD;*=+SSx-L_&++#cL~nKS9lef+vUt9aL#f)+jj{$* zu=CQ~>}~D;zX*T)&FB*Q^J)EMgVa$rqxpF+#Qc@c@71KL4gPGgHD2oUU^m`DF0fq(Ju#wpiFly*P4$h?e5|M$ zC4LZo`sqK#f9+n^rt39T9=^u=ggDZ1?>?fxfqshBwP}Bc%75eh_+YHZmUC|U0iPS> z!swCYqpHz=-68sq*ul2nMgPuXv4g|u52YoxYiHZ1eBO9IL-^=v5AU@2Mx)SArt^#@(Z=bvt8cRnKV1!o+C&;5)?=M6q1`!fvr%3gTw9ml9Bbm~hVOnR$r z&0_~Ge!zI;YvCLF5l?xT??_%iZ)#yWieDG~1@u!S`hlId!SvXYj!&+)`cmYRJkNjh zr_gr&BB|qVU1EObJ*rb7=2qYoZm@t5xa+mGxm z7sNhgf4t;5#F1)dry2%`Kg78n(GRpgA})luU!h%x%=imBQFtx!67J^!yT1JJ68n1S z=Y1uM^|?VUsOql1%7q~FIPqHg(6Zkjc|z$iG_R`iT%I>@K4LFS!?+dyey;e*{n=la zgLq?!`SIen>{DWXyzCp(Ut{_(IIw+_IMMfWHGpHzt7u4mHx z?+*3o9H)i9O08#+88-TldUQ=cD&K|mQKkQ$`0&|RJAVZFahD66#|8iBZ^KCErTj-9 zXE`6vD``Mmo*OuQvOsmO5uC= zB>Zs1jyJmPoAw)R!+KO*j-meqf1LBkdNhC89m4$rR6iB|o)ZrwUrgsYbb}w6qUqT} z-&Zg8bI%vkeENOSM;p;Yx-pK_1G@W1^?VEDc@jufE0(JnBYXpNI!)}X5Vq%AfljYR z(R*f^>ZBtUA9a&IoR=wh-Sz)1e>DGDRiCbzZ7Ks>x$YasbI))enPaGPO&CPmRW@PK zN{N@a^HF~8HuTI(yMLbi=L%kr>K~`~wYIG<=)CE5$2^?3f%C~9OpBg4_iK~1W_UjF z!`b~*H9oD7_>UjmW0tY3fnO}gy?&1K*2O;UTVnm3#^=dm2iK>b|CFuQ`hj!S`MAkB z_8;Qh{YJvBcb9Y8zmtzTqxk{ohc9}IHy zU+@p5Q@4M)|AX*#ydKrhnDImWPJ9sMO8vG%uu3F3Q zxBv0XFb$?3!(8!A{Cu33CiId}V&4dj2hZYscDJK0r3Nd}D0ZfkA25G9=ixj^@mKZZ z`ryX|&s^}jFave2!8BL3%f4RN!gW1-F6ZHQvH1bpjpFBa&ZGSYJ7k_9`lhv+ktKOT zh-2I#|C;quiK7gHejw`s9u&*J*7}Un4>X;YYfTq-{HQ9yq~6TiUu78LuVTIfzoie6 zZWk~feEJUlD&g0xAJn408$N&4*dui9JRioF3&;xq-HKL^$q!4INi>D^%yS7%vvF#KZGr{kR4vAk3Lds&|PQ22&3OZNM}VDSUD zJQUXZp&#r=T|uVc11zRK5I^`R8{rUuetR{vf~&me-H=si!QN0lD4x{ z-#q5a{G^|rT*Jkk$E z>v5f-Ul$IbUw$8uu%M&(H$^`XJ1d;yb5p?&Duhzmf9~D;#1h+2&hbV&?}78krEj>@ z1A0oVcNTo(eeGRlwypHV^FhuQ>nEi7fbuJI{J9FL;OX+2=MGzV;FeFp_6hJ{ zP;ZV0R~5NcpXV!5H1|F{+8 zn$!cj*PCfQ{)*@aVI|g^>2p#2M|qxFz-#gQyT>(oUC?_Y`ji;glzn5qQ;$@&nR>H| z)KPl(sW0+Ke^};J$47zsGkN88hzL+R+*#zpW8zPn* z*JS%N68Yr7xBqzmg7(KspZc73CBA<_pU-OKMXUF0sp>-27r1|r z$e(DoqYsO|Bl?KgS>dejs=j%ePK*3Wb(cQTr%cIt(Yjsm{o;2`;C#?Jg8w+5V*W(E zo;i{q(vVgiNmiX(ZTZ9U-1c8F{uOXvsqwO&c+R&x<~2{TJ~Db=RDG7FqwG*xa#w{O z@8zHlOy#LCU5b8T=LK+IddO4Nr@o}T0O32@FLJ_mI zp0wfv{G255IlF%^|6ViN{;g^R7$|h%q#w~oxqqV6H^w7xLC(jy9=%@$=o|iJ80m2E zP&+=S`RE<`Ik28khxu=C9QsO$-V3}g*3Uuzj;deK`&})ZrnVZ9G^y!+Yv0`X8`F{C z$52vDJt`bb5Pk)Wc2FHUx~j*8b~AQZ`{t(i>bkaNqVFjEtAse9 z%){s1oXXtWJigIE;k)j6Y2rWN{fK8jQf-M(0}qP%$u*y@G!^nak`C^Ce#VWz-04)} zr`!(%exwB>X_R_P%9V-kc&+))i5TXmFNM*=gTH%iVe0>|Z|?qUe$SXM)8pDWu9sgY z^Emma=94S4`Tm?h%6{%e|83v&;IZMPQ}Orec@EZR`q>bT_x1_>IO$aH+u?B3U9@38 z^a`;robw1?>;B5eYB7I18jJT$x#P8#+xh8u{$RR2*sL&-eyV~8BKPe)6pqV)@5~&* z{p~*zIfwl#u=_#k^HK3r=EHt8|K0h16`I@mux|D#%SFV4H_3Yyg^%KVirJ@v*IM4q z@~8KeJLuRO?s|ySqia8(de4J8LiHD0Vm-RHORm8#>DY>*Rr+7%bm?E^F6Ww?ftkC+ zp8NmHIhN0L=gr|1Yvhqo>gV^(Z#|u-@RLQ|{}5_}q$-q_S5$FDF{} z@$VI*Q&|tsQ~@jGwm_2lkYG+7vlVI-lJo=j$@#EJ%-Gq<^C`s_gW#o9X*T&D!0pby-W6o zuwSYrHQUwdrqPsf3Tin^*?&QCrxAg9~wsUD|S?^ z^EUB3Iqv;XSpP--+s)xrNhb~pUz2!^{hk!TH@!cneYk$(Gto1hcrA6+obQc&`z;!H zAoal=d0oZcQ>^8Vs>fhIN*lWH`rX@nrku3)&5a-IFkcUt4*r%hnDbpqVZWqqsu(}e z^sekiJx^v!(y!r&5qr*^k8(br_;;F=SjVFIl6oJD$Y=eYm}851Jt{6^=s$T*yC0YM z{dv88={K`IUteF`k8>LK5Yvl~-H({@rVZzZoV9;fU$@>PZTG>@^|bBbM8|<=Q|ABKo8sBD!U#t!JM$MUe_BGQ$@?Z_ zJ^$g~VLL|hP264KHIEULXQuQ zhCi}2@Y)%d5x&FrTr-xZ{UjddU;nTP#0OpXUSOCvQkgPaXe#yvNqx z-24<=_fZFV(Q&A!m2(AuyMv!X@w@o)Etf_RUZ9jxdh@sX8#$FKSZFS3cfRFYsH7A22_z4}7)zZD_x8JgzH$d`bN_ z4D&Bn&=DURNxW|9=>u`@CjuD~!FD1OsNbgHid2e48n%M8e*q?e{vev`= zNbDLruaoIl{2{TFcfEY#GgX8S0v7XAXuMJVyfvM^)|#5G$bD{plpSYsliS=+NcdX} zncd&oH;{S?na}q3YWqj|4_MAA`D?oUBgK#F&JVaQ6nULKBjJY3J+MXs7?DR z`PN^#r!4;F#&9sap`#0Sme8Gi=#Qd z9n~wE#IASp15JmwGf`L3jwX(Nv+Uwsw)}Ck8+G3LHxe(2Cw^b8*xTR-#q35c-!=+A zXh};Hx4u&39;cPOG_DVpyz>N7U2)V4fCS%M)}#7@vYx2)_-s0wcbE7ZoOH_i!bOjM z{xs+z(J@qCy-wn#n(lhM{@z5?GsU&!emzU=I4%4CB(K2kf6aXi!AI3wR!i!CEqK88 zTz%612h~?c^H=rHzxim9NBWfL&!tX0B^K`obnjE5_5V*YVK+9VGCgCr%o_5Zh1YB! z+xsZ}95@eB?BGGP>3``T)cH&7Ch_k~;pbKT9Q1nLle#uP(rN3ZMc?yYk70;@&Gnkl zM=!@vU9;g}Pj~%VpMRO@OsAbypDeA1*!BYFKREq6Tt_T&n^12hdLr<;n14s>@s*Gl z5Ymjy@WQQyUt9a;7XM*CHsYGA{F(kXiJwB~$IeI6{N`XL+b_9Mq+a&1IVi%`KmUyf zhQa4`&cksGu^UG+{@ifiH;red+w(aEhM_Lj#shtRcj#Zmaa7NVASV+&04XM~-H!t&X+^QAAf^g(C75Jn-Y4SHCh@Kvm*dOG7_Lfe2Fj2P%ABELk>I_?pylvD6B{LFZL~GI(|3I${S?* ztH=50BLeZclfQ}IMbpnb!qMC(is3 z#t+Etgp%Jg#Op?%(;m$!FST)Q>6DInk3Y6BZ$w_uz%r@bhYT4q;2-s;{~2gxh=I5pQ@b^m%tf~f0A0MSYY@QmJ z+9EY5wPk9n)YhrNsclk2Qro76riLY}-%O1dlDWYAv4`?<%ZG-9MTUgssb9=3o0OfL zo|mfL{W@mS)UgxO^G4~*#W9%^CS^>RqW)VhebR)n z$$2aC1{wpa4lHFpO=ePNx|%7sY+}aDX?eMo#wBGWrKBZIPFDXUjcNs}f`%}P%kmpM)Sa#BWe;-sW8$&=Fa9!t+06Y_ujgR_AE literal 0 HcmV?d00001 diff --git a/data/y_test.pkl b/data/y_test.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f4b88d3af41b3c120bcbb1e45c171932eb668330 GIT binary patch literal 3224 zcmZvfU1%It6vuaC65A%FB(_b1B@)}xB(~cmn~#kqW}A=Fb^=`qHa(TM-n*7lmqCK@kipB3MB!C>DGu(twJh*uMBu@67$(m=xzBzxkhY z?z!jQxqCS&l)ds&Wx#*js%axTVc4PZshk-~<&2Ce+|2`qV;u3{&xpq7#GGhwOB0!s zIpNl=51-6BW-e=_?NG+Z8Yj)1E!@h!^wjv1$CMWS@P;ute%Ho<6Xr9f9ZDLuoNh}Z zYtAHMS!t64tO~bGzxxwU$YO-MBb(1m&#E#r`Lr`>C9v*(^ zmVEp_@m@zHm&Rn~1bQGD67h? zUbmjLz=}jFHUcNGHFLG|(*Jc|8u^boom?V^7`-+))KZ-%d9 zA4d11e}t#7pGJR%UO{hzJ7FFEo9MUTRXBqG3-ovBztHcXqv#p<1iS)^#FfC~*uO!? z;GfvT@G|xRcmgiquSXw&-(yDxDtM{Iz7uW2O6=F+NAOIK0;g^{)BxSdISGTJE zrq1rQ`u+8%TGaSr2mTOjg5+5ZGfwMP=_c`D{ zU2FM#S?FcFUT=iBHtbF8M+81d9gQ2~mOEMBKB)J-jkxW~UW<+%CEq^$yQx>t+p?zB z{_SC2y+8W<>+{$}9(_Jqs9%5Iz0{?@59hY<0OR!ck1?+vzlS_JUY|40Z=ivAeIE4r z*WZ`@FVP-;@}=e8L0&E-!U>AdF{ zbT`TFJ|TUwgWr_QPs;u}D__g(wAb-^9kkardz0pkyzMu@3E4f%e-*s$T>smU$e51d vx9eWFY&eemhrp{OkxCmU{nr{Jot~Pp6BGGqnb~m6MA}H2XJbgxDiWw#*Mf!8crlFb8&BMVtDsXtu3c{)sSlcnb{3T!u!HWPsm8B z?iL2);l07WxU#7R(M4vl{O@!;qJpu^74h`=z zxn|K7)~;QOG0R2K{C9CiEEkPV@)ptKoI<3Ccf)3n?!_ z$HWgjAAXba6YvM{8p)3)jN^wDY20N1sH$f<6H6#J(Q( z!zvy!kzGK_ze65_1n>RppU?(;2g@|K_}rpcq!#?piiK` z0I$Tp27N2sjeQUL33w3u1iB9W9=Zp;3VlBMefS`}i}G91i{W~>8r}`R4sW2{5d0nX zAoRh%!hO{H3j87VYWM*5!|*xmXQBU%z70JO?Lt2VN3g#M@5BBg`fl_+=nyQ!{#*1$ z*Z@o6BI<2}Kga$P^eXgG^fY=Nx)gpJ`}fgTqr2c?n1OFnZvlEM`b8MPUWYyt{S830wl7q5ijEEqobXPrXOc z&FHysOziL=cHnxHf8K#!>fMe$2Ym{j4ezCVKD-CU;YHLt5B)myV;@8>fPcfj1a@G* z5dBs3GI$dEcJ%x35cVeYH0*{SP~HM}V{byALf?yi1pNs5L-h0TA?$a-cGw6nq~4#< z|AZ^x2FiZ{Z^OP^{KLnvA3&GGTi^w-2>Ri1+Fb>2grk%Xqf_V$(9fcGz~5m1HF^sD zF?u=rVYm%`37$#4i(w8%D7T_(%{gPUZtb1_b8C00Yx>TRy6T%no2-<5O|R6~_3)4t zZ9ILo_SsLaGvLtI95|I%&wwv*b5=d>R_F`po;T=vxd3{(F3)bn-UOQq?Hzyl7V0+# zBF?j&{_1JpNc|4z*Ky>fU)r7AOuJV6__g0m6Ph@Dz34XlSHfCYg@3Q686fU<>@~2C zxcgv>_B-vPyp{gF^j8f(%{aP=w;j8Wei(1I7Jn_o<-uQv_P>~MOf!F(HsaU-`?SAg z4}QDpmvOsW=)aEsw`jkG^$lQ`aW~_K^~;uOe}mg}J97*1b<=-2^&8QC?eAoO`SBLw zblFeFPdu%#hH=Y$60hvXTvuU!oNY7BKA2`-4${v6^OBLcWuDd*#wYvPiNocg-*tug z(Ef=l*GBvj7yV^x0=oT1=9zUG=)qqN<7%K@HM$;MfnWR&cquO>PUhd&KwRuES3vt8 z+(f@!+Mk+Fm;D4-#|l|r+4q#YWFIu?`fB|8yu0zox<%xCb{6{4=U4p5Jji~kWW9T- z&w0w1(T~h$3*%w^bIp|ZF(0L{N&Cr)KlW9ok$B~tNIaaMitV&N-Ji}g#52Hrx$yh) zX5x|g5WkW~w&Jf+_nRZH`DC9np8RUo%}?C5jH?Da<5783^6RX9n347EqkVf}oG#}P zJM%Lj>nX4gD@2iOHZ&`QAYmzq^zdHZUK2YyNoew1+v%lT4-psGclg_?Y z`C9Lv)jFO`FY_wxBwsO}i0pfLFVk@rX6%Yft^$%pd3l6@xmZw>pQhq%aN>b)xGT(g1sA%DA1-)Ho^agL^&h?_i@lY4@f zeq{Z~M{1uqc_b_GNj^Nif1UME`9iNt9dXHfTiy?HzPayZWnRP|_pz+(Te+Wf;g|fU z-p9H>&Y{vS=aKv_`9jxtB7ojNEHQ z%X?n#Ax#12Y8{zzwT?E={v2*!tow`9)%uVXZL=@dhPqHMiGD(TtdA~>E|0FXKetES zGe5SkQ=iYH<@Sejxr7`0EwiN2Z#C_{^F^CgBW(#4PM!_T;jgEUIp2PoEEJhAf`_bKgH5AgVCal7K8_c$7jUi_gIKIzb6 zcI-LyrDpmbGhQpItc#^Bry$o+UcXUuNQJ~jxA{NL8r0O2-u3?_qGB*|v9X{n_3Jvu z)-y~a54Z>6O+H3AhZ~Xn|@L^hg`u6+R z5XF{j7iZ```iE|dn56AAIz$)kpd~6WCN|;{O+&1SYhuIeDK?kYyy5i`x5RBP12GBR z!aln0-DLZIvc0en(9BWvTsQL`F*pv#-M1hcfAQF@lE)a`k&jlhrlct=g0Ych?gy2& zd|$q91C0rSGnye8)_dgCysY9Df@w&t7_22*`c7COVH)5DNLlY0Hfd(`GFZ+eWLz3N IGEJ8J1!HzSHUIzs literal 0 HcmV?d00001 From cd8d36c3a4961245677e1d9b9b366cfc8ffda591 Mon Sep 17 00:00:00 2001 From: SergeyZ06 Date: Mon, 10 May 2021 13:07:10 +0300 Subject: [PATCH 2/3] Supplementary tasks have been solved --- 5th_task.ipynb | 2372 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2372 insertions(+) create mode 100644 5th_task.ipynb diff --git a/5th_task.ipynb b/5th_task.ipynb new file mode 100644 index 0000000..a1d2b4f --- /dev/null +++ b/5th_task.ipynb @@ -0,0 +1,2372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cdbabd90", + "metadata": {}, + "source": [ + "### *Дополнительные задания:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5b166c46", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "4dc0a817", + "metadata": {}, + "source": [ + "1). Загрузите датасет Wine из встроенных датасетов sklearn.datasets с помощью функции load_wine в переменную data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "82aa9de9", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_wine" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "07c48f45", + "metadata": {}, + "outputs": [], + "source": [ + "data = load_wine()" + ] + }, + { + "cell_type": "markdown", + "id": "aa8ff8b5", + "metadata": {}, + "source": [ + "2). Полученный датасет не является датафреймом. Это структура данных, имеющая ключи аналогично словарю. Просмотрите тип данных этой структуры данных и создайте список data_keys, содержащий ее ключи." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "06bff2ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sklearn.utils.Bunch" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c581a8dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_keys = data.keys()\n", + "data_keys" + ] + }, + { + "cell_type": "markdown", + "id": "7fde60f3", + "metadata": {}, + "source": [ + "3). Просмотрите данные, описание и названия признаков в датасете. Описание нужно вывести в виде привычного, аккуратно оформленного текста, без обозначений переноса строки, но с самими переносами и т.д." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "2c2d7986", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n", + " 1.065e+03],\n", + " [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n", + " 1.050e+03],\n", + " [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n", + " 1.185e+03],\n", + " ...,\n", + " [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n", + " 8.350e+02],\n", + " [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n", + " 8.400e+02],\n", + " [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n", + " 5.600e+02]])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3eb94690", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _wine_dataset:\n", + "\n", + "Wine recognition dataset\n", + "------------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 178 (50 in each of three classes)\n", + " :Number of Attributes: 13 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " \t\t- Alcohol\n", + " \t\t- Malic acid\n", + " \t\t- Ash\n", + "\t\t- Alcalinity of ash \n", + " \t\t- Magnesium\n", + "\t\t- Total phenols\n", + " \t\t- Flavanoids\n", + " \t\t- Nonflavanoid phenols\n", + " \t\t- Proanthocyanins\n", + "\t\t- Color intensity\n", + " \t\t- Hue\n", + " \t\t- OD280/OD315 of diluted wines\n", + " \t\t- Proline\n", + "\n", + " - class:\n", + " - class_0\n", + " - class_1\n", + " - class_2\n", + "\t\t\n", + " :Summary Statistics:\n", + " \n", + " ============================= ==== ===== ======= =====\n", + " Min Max Mean SD\n", + " ============================= ==== ===== ======= =====\n", + " Alcohol: 11.0 14.8 13.0 0.8\n", + " Malic Acid: 0.74 5.80 2.34 1.12\n", + " Ash: 1.36 3.23 2.36 0.27\n", + " Alcalinity of Ash: 10.6 30.0 19.5 3.3\n", + " Magnesium: 70.0 162.0 99.7 14.3\n", + " Total Phenols: 0.98 3.88 2.29 0.63\n", + " Flavanoids: 0.34 5.08 2.03 1.00\n", + " Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n", + " Proanthocyanins: 0.41 3.58 1.59 0.57\n", + " Colour Intensity: 1.3 13.0 5.1 2.3\n", + " Hue: 0.48 1.71 0.96 0.23\n", + " OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n", + " Proline: 278 1680 746 315\n", + " ============================= ==== ===== ======= =====\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "This is a copy of UCI ML Wine recognition datasets.\n", + "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n", + "\n", + "The data is the results of a chemical analysis of wines grown in the same\n", + "region in Italy by three different cultivators. There are thirteen different\n", + "measurements taken for different constituents found in the three types of\n", + "wine.\n", + "\n", + "Original Owners: \n", + "\n", + "Forina, M. et al, PARVUS - \n", + "An Extendible Package for Data Exploration, Classification and Correlation. \n", + "Institute of Pharmaceutical and Food Analysis and Technologies,\n", + "Via Brigata Salerno, 16147 Genoa, Italy.\n", + "\n", + "Citation:\n", + "\n", + "Lichman, M. (2013). UCI Machine Learning Repository\n", + "[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n", + "School of Information and Computer Science. \n", + "\n", + ".. topic:: References\n", + "\n", + " (1) S. Aeberhard, D. Coomans and O. de Vel, \n", + " Comparison of Classifiers in High Dimensional Settings, \n", + " Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Technometrics). \n", + "\n", + " The data was used with many others for comparing various \n", + " classifiers. The classes are separable, though only RDA \n", + " has achieved 100% correct classification. \n", + " (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n", + " (All results using the leave-one-out technique) \n", + "\n", + " (2) S. Aeberhard, D. Coomans and O. de Vel, \n", + " \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n", + " Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Journal of Chemometrics).\n", + "\n" + ] + } + ], + "source": [ + "for line in data.DESCR.split('\\n'):\n", + " print(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "8442a37e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['alcohol',\n", + " 'malic_acid',\n", + " 'ash',\n", + " 'alcalinity_of_ash',\n", + " 'magnesium',\n", + " 'total_phenols',\n", + " 'flavanoids',\n", + " 'nonflavanoid_phenols',\n", + " 'proanthocyanins',\n", + " 'color_intensity',\n", + " 'hue',\n", + " 'od280/od315_of_diluted_wines',\n", + " 'proline']" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.feature_names" + ] + }, + { + "cell_type": "markdown", + "id": "7e607194", + "metadata": {}, + "source": [ + "4). Сколько классов содержит целевая переменная датасета? Выве\n", + "дите названия классов." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "1b9a5e70", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['class_0', 'class_1', 'class_2'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesproline
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n", + "" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " od280/od315_of_diluted_wines proline \n", + "0 3.92 1065.0 \n", + "1 3.40 1050.0 \n", + "2 3.17 1185.0 \n", + "3 3.45 1480.0 \n", + "4 2.93 735.0 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.head()" + ] + }, + { + "cell_type": "markdown", + "id": "38a4a1cf", + "metadata": {}, + "source": [ + "6). Выясните размер датафрейма X и установите, имеются ли в нем пропущенные значения." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "213659ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(178, 13)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "1a8083ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesproline
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000
mean13.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258
std0.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474
min11.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000
25%12.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000
50%13.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000
75%13.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000
max14.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 13.000618 2.336348 2.366517 19.494944 99.741573 \n", + "std 0.811827 1.117146 0.274344 3.339564 14.282484 \n", + "min 11.030000 0.740000 1.360000 10.600000 70.000000 \n", + "25% 12.362500 1.602500 2.210000 17.200000 88.000000 \n", + "50% 13.050000 1.865000 2.360000 19.500000 98.000000 \n", + "75% 13.677500 3.082500 2.557500 21.500000 107.000000 \n", + "max 14.830000 5.800000 3.230000 30.000000 162.000000 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 2.295112 2.029270 0.361854 1.590899 \n", + "std 0.625851 0.998859 0.124453 0.572359 \n", + "min 0.980000 0.340000 0.130000 0.410000 \n", + "25% 1.742500 1.205000 0.270000 1.250000 \n", + "50% 2.355000 2.135000 0.340000 1.555000 \n", + "75% 2.800000 2.875000 0.437500 1.950000 \n", + "max 3.880000 5.080000 0.660000 3.580000 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 5.058090 0.957449 2.611685 746.893258 \n", + "std 2.318286 0.228572 0.709990 314.907474 \n", + "min 1.280000 0.480000 1.270000 278.000000 \n", + "25% 3.220000 0.782500 1.937500 500.500000 \n", + "50% 4.690000 0.965000 2.780000 673.500000 \n", + "75% 6.200000 1.120000 3.170000 985.000000 \n", + "max 13.000000 1.710000 4.000000 1680.000000 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f5c9e3a7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 alcohol 178 non-null float64\n", + " 1 malic_acid 178 non-null float64\n", + " 2 ash 178 non-null float64\n", + " 3 alcalinity_of_ash 178 non-null float64\n", + " 4 magnesium 178 non-null float64\n", + " 5 total_phenols 178 non-null float64\n", + " 6 flavanoids 178 non-null float64\n", + " 7 nonflavanoid_phenols 178 non-null float64\n", + " 8 proanthocyanins 178 non-null float64\n", + " 9 color_intensity 178 non-null float64\n", + " 10 hue 178 non-null float64\n", + " 11 od280/od315_of_diluted_wines 178 non-null float64\n", + " 12 proline 178 non-null float64\n", + "dtypes: float64(13)\n", + "memory usage: 18.2 KB\n" + ] + } + ], + "source": [ + "x.info()" + ] + }, + { + "cell_type": "markdown", + "id": "dfa52f83", + "metadata": {}, + "source": [ + "7). Добавьте в датафрейм поле с классами вин в виде чисел, имеющих тип данных numpy.int64. Название поля - 'target'." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "8a03f527", + "metadata": {}, + "outputs": [], + "source": [ + "x['target'] = pd.Series(data = data.target, dtype = 'int64')" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "0e5a5b85", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 alcohol 178 non-null float64\n", + " 1 malic_acid 178 non-null float64\n", + " 2 ash 178 non-null float64\n", + " 3 alcalinity_of_ash 178 non-null float64\n", + " 4 magnesium 178 non-null float64\n", + " 5 total_phenols 178 non-null float64\n", + " 6 flavanoids 178 non-null float64\n", + " 7 nonflavanoid_phenols 178 non-null float64\n", + " 8 proanthocyanins 178 non-null float64\n", + " 9 color_intensity 178 non-null float64\n", + " 10 hue 178 non-null float64\n", + " 11 od280/od315_of_diluted_wines 178 non-null float64\n", + " 12 proline 178 non-null float64\n", + " 13 target 178 non-null int64 \n", + "dtypes: float64(13), int64(1)\n", + "memory usage: 19.6 KB\n" + ] + } + ], + "source": [ + "x.info()" + ] + }, + { + "cell_type": "markdown", + "id": "4b1f744b", + "metadata": {}, + "source": [ + "8). Постройте матрицу корреляций для всех полей X. Дайте полученному датафрейму название X_corr." + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "567874ea", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sn" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "bb009194", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolinetarget
alcohol1.000.000.00-0.310.000.000.000.000.000.550.000.000.64-0.33
malic_acid0.001.000.000.000.00-0.34-0.410.000.000.00-0.56-0.370.000.44
ash0.000.001.000.440.000.000.000.000.000.000.000.000.000.00
alcalinity_of_ash-0.310.000.441.000.00-0.32-0.350.360.000.000.000.00-0.440.52
magnesium0.000.000.000.001.000.000.000.000.000.000.000.000.390.00
total_phenols0.00-0.340.00-0.320.001.000.86-0.450.610.000.430.700.50-0.72
flavanoids0.00-0.410.00-0.350.000.861.00-0.540.650.000.540.790.49-0.85
nonflavanoid_phenols0.000.000.000.360.00-0.45-0.541.00-0.370.000.00-0.50-0.310.49
proanthocyanins0.000.000.000.000.000.610.65-0.371.000.000.300.520.33-0.50
color_intensity0.550.000.000.000.000.000.000.000.001.00-0.52-0.430.320.00
hue0.00-0.560.000.000.000.430.540.000.30-0.521.000.570.00-0.62
od280/od315_of_diluted_wines0.00-0.370.000.000.000.700.79-0.500.52-0.430.571.000.31-0.79
proline0.640.000.00-0.440.390.500.49-0.310.330.320.000.311.00-0.63
target-0.330.440.000.520.00-0.72-0.850.49-0.500.00-0.62-0.79-0.631.00
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash \\\n", + "alcohol 1.00 0.00 0.00 -0.31 \n", + "malic_acid 0.00 1.00 0.00 0.00 \n", + "ash 0.00 0.00 1.00 0.44 \n", + "alcalinity_of_ash -0.31 0.00 0.44 1.00 \n", + "magnesium 0.00 0.00 0.00 0.00 \n", + "total_phenols 0.00 -0.34 0.00 -0.32 \n", + "flavanoids 0.00 -0.41 0.00 -0.35 \n", + "nonflavanoid_phenols 0.00 0.00 0.00 0.36 \n", + "proanthocyanins 0.00 0.00 0.00 0.00 \n", + "color_intensity 0.55 0.00 0.00 0.00 \n", + "hue 0.00 -0.56 0.00 0.00 \n", + "od280/od315_of_diluted_wines 0.00 -0.37 0.00 0.00 \n", + "proline 0.64 0.00 0.00 -0.44 \n", + "target -0.33 0.44 0.00 0.52 \n", + "\n", + " magnesium total_phenols flavanoids \\\n", + "alcohol 0.00 0.00 0.00 \n", + "malic_acid 0.00 -0.34 -0.41 \n", + "ash 0.00 0.00 0.00 \n", + "alcalinity_of_ash 0.00 -0.32 -0.35 \n", + "magnesium 1.00 0.00 0.00 \n", + "total_phenols 0.00 1.00 0.86 \n", + "flavanoids 0.00 0.86 1.00 \n", + "nonflavanoid_phenols 0.00 -0.45 -0.54 \n", + "proanthocyanins 0.00 0.61 0.65 \n", + "color_intensity 0.00 0.00 0.00 \n", + "hue 0.00 0.43 0.54 \n", + "od280/od315_of_diluted_wines 0.00 0.70 0.79 \n", + "proline 0.39 0.50 0.49 \n", + "target 0.00 -0.72 -0.85 \n", + "\n", + " nonflavanoid_phenols proanthocyanins \\\n", + "alcohol 0.00 0.00 \n", + "malic_acid 0.00 0.00 \n", + "ash 0.00 0.00 \n", + "alcalinity_of_ash 0.36 0.00 \n", + "magnesium 0.00 0.00 \n", + "total_phenols -0.45 0.61 \n", + "flavanoids -0.54 0.65 \n", + "nonflavanoid_phenols 1.00 -0.37 \n", + "proanthocyanins -0.37 1.00 \n", + "color_intensity 0.00 0.00 \n", + "hue 0.00 0.30 \n", + "od280/od315_of_diluted_wines -0.50 0.52 \n", + "proline -0.31 0.33 \n", + "target 0.49 -0.50 \n", + "\n", + " color_intensity hue \\\n", + "alcohol 0.55 0.00 \n", + "malic_acid 0.00 -0.56 \n", + "ash 0.00 0.00 \n", + "alcalinity_of_ash 0.00 0.00 \n", + "magnesium 0.00 0.00 \n", + "total_phenols 0.00 0.43 \n", + "flavanoids 0.00 0.54 \n", + "nonflavanoid_phenols 0.00 0.00 \n", + "proanthocyanins 0.00 0.30 \n", + "color_intensity 1.00 -0.52 \n", + "hue -0.52 1.00 \n", + "od280/od315_of_diluted_wines -0.43 0.57 \n", + "proline 0.32 0.00 \n", + "target 0.00 -0.62 \n", + "\n", + " od280/od315_of_diluted_wines proline target \n", + "alcohol 0.00 0.64 -0.33 \n", + "malic_acid -0.37 0.00 0.44 \n", + "ash 0.00 0.00 0.00 \n", + "alcalinity_of_ash 0.00 -0.44 0.52 \n", + "magnesium 0.00 0.39 0.00 \n", + "total_phenols 0.70 0.50 -0.72 \n", + "flavanoids 0.79 0.49 -0.85 \n", + "nonflavanoid_phenols -0.50 -0.31 0.49 \n", + "proanthocyanins 0.52 0.33 -0.50 \n", + "color_intensity -0.43 0.32 0.00 \n", + "hue 0.57 0.00 -0.62 \n", + "od280/od315_of_diluted_wines 1.00 0.31 -0.79 \n", + "proline 0.31 1.00 -0.63 \n", + "target -0.79 -0.63 1.00 " + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_corr = x.corr()\n", + "x_corr = np.round(x_corr, 2)\n", + "x_corr[np.abs(x_corr) < 0.3] = 0\n", + "\n", + "x_corr" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "73472f2b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize = (15, 10))\n", + "\n", + "sn.set(font_scale = 1.4)\n", + "\n", + "sn.heatmap(x_corr, annot = True, cmap = 'coolwarm', linewidth = 0.5)" + ] + }, + { + "cell_type": "markdown", + "id": "0023496e", + "metadata": {}, + "source": [ + "9). Создайте список high_corr из признаков, корреляция которых с полем target по абсолютному значению превышает 0.5 (причем, само поле target не должно входить в этот список)." + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "bf1eef64", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "alcalinity_of_ash 0.52\n", + "total_phenols -0.72\n", + "flavanoids -0.85\n", + "hue -0.62\n", + "od280/od315_of_diluted_wines -0.79\n", + "proline -0.63\n", + "Name: target, dtype: float64" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_corr = x_corr['target'][(np.abs(x_corr['target']) > 0.5) & (x_corr['target'].keys() != 'target')]\n", + "high_corr" + ] + }, + { + "cell_type": "markdown", + "id": "d9935f3c", + "metadata": {}, + "source": [ + "10). Удалите из датафрейма X поле с целевой переменной. Для всех признаков, названия которых содержатся в списке high_corr, вычислите квадрат их значений и добавьте в датафрейм X соответствующие поля с суффиксом '_2', добавленного к первоначальному названию признака. Итоговый датафрейм должен содержать все поля, которые, были в нем изначально, а также поля с признаками из списка high_corr, возведенными в квадрат. Выведите описание полей датафрейма X с помощью метода describe." + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "d5ff48b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesproline
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " od280/od315_of_diluted_wines proline \n", + "0 3.92 1065.0 \n", + "1 3.40 1050.0 \n", + "2 3.17 1185.0 \n", + "3 3.45 1480.0 \n", + "4 2.93 735.0 " + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = x.drop(labels = ['target'], axis = 1)\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "6d8da08a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcalinity_of_ash_2total_phenols_2flavanoids_2hue_2od280/od315_of_diluted_wines_2proline_2
0243.367.84009.36361.081615.36641134225.0
1125.447.02257.61761.102511.56001102500.0
2345.967.840010.49761.060910.04891404225.0
3282.2414.822512.18010.739611.90252190400.0
4441.007.84007.23611.08168.5849540225.0
.....................
173420.252.82240.37210.40963.0276547600.0
174529.003.24000.56250.49002.4336562500.0
175400.002.52810.47610.34812.4336697225.0
176400.002.72250.46240.36002.6244705600.0
177600.254.20250.57760.37212.5600313600.0
\n", + "

178 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " alcalinity_of_ash_2 total_phenols_2 flavanoids_2 hue_2 \\\n", + "0 243.36 7.8400 9.3636 1.0816 \n", + "1 125.44 7.0225 7.6176 1.1025 \n", + "2 345.96 7.8400 10.4976 1.0609 \n", + "3 282.24 14.8225 12.1801 0.7396 \n", + "4 441.00 7.8400 7.2361 1.0816 \n", + ".. ... ... ... ... \n", + "173 420.25 2.8224 0.3721 0.4096 \n", + "174 529.00 3.2400 0.5625 0.4900 \n", + "175 400.00 2.5281 0.4761 0.3481 \n", + "176 400.00 2.7225 0.4624 0.3600 \n", + "177 600.25 4.2025 0.5776 0.3721 \n", + "\n", + " od280/od315_of_diluted_wines_2 proline_2 \n", + "0 15.3664 1134225.0 \n", + "1 11.5600 1102500.0 \n", + "2 10.0489 1404225.0 \n", + "3 11.9025 2190400.0 \n", + "4 8.5849 540225.0 \n", + ".. ... ... \n", + "173 3.0276 547600.0 \n", + "174 2.4336 562500.0 \n", + "175 2.4336 697225.0 \n", + "176 2.6244 705600.0 \n", + "177 2.5600 313600.0 \n", + "\n", + "[178 rows x 6 columns]" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_2 = x[high_corr.keys()] ** 2\n", + "x_2 = pd.DataFrame(data = x_2.values, columns = list(high_corr.keys() + '_2'))\n", + "x_2" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "73e17206", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolinealcalinity_of_ash_2total_phenols_2flavanoids_2hue_2od280/od315_of_diluted_wines_2proline_2
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0243.367.84009.36361.081615.36641134225.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0125.447.02257.61761.102511.56001102500.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0345.967.840010.49761.060910.04891404225.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0282.2414.822512.18010.739611.90252190400.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0441.007.84007.23611.08168.5849540225.0
............................................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.0420.252.82240.37210.40963.0276547600.0
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.0529.003.24000.56250.49002.4336562500.0
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.0400.002.52810.47610.34812.4336697225.0
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.0400.002.72250.46240.36002.6244705600.0
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.0600.254.20250.57760.37212.5600313600.0
\n", + "

178 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline alcalinity_of_ash_2 \\\n", + "0 3.92 1065.0 243.36 \n", + "1 3.40 1050.0 125.44 \n", + "2 3.17 1185.0 345.96 \n", + "3 3.45 1480.0 282.24 \n", + "4 2.93 735.0 441.00 \n", + ".. ... ... ... \n", + "173 1.74 740.0 420.25 \n", + "174 1.56 750.0 529.00 \n", + "175 1.56 835.0 400.00 \n", + "176 1.62 840.0 400.00 \n", + "177 1.60 560.0 600.25 \n", + "\n", + " total_phenols_2 flavanoids_2 hue_2 od280/od315_of_diluted_wines_2 \\\n", + "0 7.8400 9.3636 1.0816 15.3664 \n", + "1 7.0225 7.6176 1.1025 11.5600 \n", + "2 7.8400 10.4976 1.0609 10.0489 \n", + "3 14.8225 12.1801 0.7396 11.9025 \n", + "4 7.8400 7.2361 1.0816 8.5849 \n", + ".. ... ... ... ... \n", + "173 2.8224 0.3721 0.4096 3.0276 \n", + "174 3.2400 0.5625 0.4900 2.4336 \n", + "175 2.5281 0.4761 0.3481 2.4336 \n", + "176 2.7225 0.4624 0.3600 2.6244 \n", + "177 4.2025 0.5776 0.3721 2.5600 \n", + "\n", + " proline_2 \n", + "0 1134225.0 \n", + "1 1102500.0 \n", + "2 1404225.0 \n", + "3 2190400.0 \n", + "4 540225.0 \n", + ".. ... \n", + "173 547600.0 \n", + "174 562500.0 \n", + "175 697225.0 \n", + "176 705600.0 \n", + "177 313600.0 \n", + "\n", + "[178 rows x 19 columns]" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = pd.concat([x, x_2], axis = 1)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "id": "a087aca7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolinealcalinity_of_ash_2total_phenols_2flavanoids_2hue_2od280/od315_of_diluted_wines_2proline_2
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.0000001.780000e+02
mean13.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258391.1428655.6570305.1100490.9686617.3221556.564591e+05
std0.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474133.6717752.9362944.2114410.4437983.5843165.558591e+05
min11.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000112.3600000.9604000.1156000.2304001.6129007.728400e+04
25%12.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000295.8400003.0363251.4521000.6123253.7540752.505010e+05
50%13.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000380.2500005.5460504.5582500.9312507.7284004.536045e+05
75%13.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000462.2500007.8400008.2657001.25440010.0489009.702250e+05
max14.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000900.00000015.05440025.8064002.92410016.0000002.822400e+06
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 13.000618 2.336348 2.366517 19.494944 99.741573 \n", + "std 0.811827 1.117146 0.274344 3.339564 14.282484 \n", + "min 11.030000 0.740000 1.360000 10.600000 70.000000 \n", + "25% 12.362500 1.602500 2.210000 17.200000 88.000000 \n", + "50% 13.050000 1.865000 2.360000 19.500000 98.000000 \n", + "75% 13.677500 3.082500 2.557500 21.500000 107.000000 \n", + "max 14.830000 5.800000 3.230000 30.000000 162.000000 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 2.295112 2.029270 0.361854 1.590899 \n", + "std 0.625851 0.998859 0.124453 0.572359 \n", + "min 0.980000 0.340000 0.130000 0.410000 \n", + "25% 1.742500 1.205000 0.270000 1.250000 \n", + "50% 2.355000 2.135000 0.340000 1.555000 \n", + "75% 2.800000 2.875000 0.437500 1.950000 \n", + "max 3.880000 5.080000 0.660000 3.580000 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 5.058090 0.957449 2.611685 746.893258 \n", + "std 2.318286 0.228572 0.709990 314.907474 \n", + "min 1.280000 0.480000 1.270000 278.000000 \n", + "25% 3.220000 0.782500 1.937500 500.500000 \n", + "50% 4.690000 0.965000 2.780000 673.500000 \n", + "75% 6.200000 1.120000 3.170000 985.000000 \n", + "max 13.000000 1.710000 4.000000 1680.000000 \n", + "\n", + " alcalinity_of_ash_2 total_phenols_2 flavanoids_2 hue_2 \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 391.142865 5.657030 5.110049 0.968661 \n", + "std 133.671775 2.936294 4.211441 0.443798 \n", + "min 112.360000 0.960400 0.115600 0.230400 \n", + "25% 295.840000 3.036325 1.452100 0.612325 \n", + "50% 380.250000 5.546050 4.558250 0.931250 \n", + "75% 462.250000 7.840000 8.265700 1.254400 \n", + "max 900.000000 15.054400 25.806400 2.924100 \n", + "\n", + " od280/od315_of_diluted_wines_2 proline_2 \n", + "count 178.000000 1.780000e+02 \n", + "mean 7.322155 6.564591e+05 \n", + "std 3.584316 5.558591e+05 \n", + "min 1.612900 7.728400e+04 \n", + "25% 3.754075 2.505010e+05 \n", + "50% 7.728400 4.536045e+05 \n", + "75% 10.048900 9.702250e+05 \n", + "max 16.000000 2.822400e+06 " + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "478bc498", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 679cb454270be3a54a91b4dfcc7a7c402b5e6cc7 Mon Sep 17 00:00:00 2001 From: SergeyZ06 Date: Sat, 15 May 2021 20:31:29 +0300 Subject: [PATCH 3/3] All files have been placed into folder "6th hometask" in order not to mix up this task with other ones --- 1th_task.ipynb => 6th hometask/1th_task.ipynb | 0 2th_task.ipynb => 6th hometask/2th_task.ipynb | 0 3th_task.ipynb => 6th hometask/3th_task.ipynb | 0 4th_task.ipynb => 6th hometask/4th_task.ipynb | 0 5th_task.ipynb => 6th hometask/5th_task.ipynb | 0 {data => 6th hometask/data}/x_test.pkl | Bin {data => 6th hometask/data}/x_train.pkl | Bin {data => 6th hometask/data}/y_test.pkl | Bin {data => 6th hometask/data}/y_train.pkl | Bin {score => 6th hometask/score}/scores.pkl | Bin 10 files changed, 0 insertions(+), 0 deletions(-) rename 1th_task.ipynb => 6th hometask/1th_task.ipynb (100%) rename 2th_task.ipynb => 6th hometask/2th_task.ipynb (100%) rename 3th_task.ipynb => 6th hometask/3th_task.ipynb (100%) rename 4th_task.ipynb => 6th hometask/4th_task.ipynb (100%) rename 5th_task.ipynb => 6th hometask/5th_task.ipynb (100%) rename {data => 6th hometask/data}/x_test.pkl (100%) rename {data => 6th hometask/data}/x_train.pkl (100%) rename {data => 6th hometask/data}/y_test.pkl (100%) rename {data => 6th hometask/data}/y_train.pkl (100%) rename {score => 6th hometask/score}/scores.pkl (100%) diff --git a/1th_task.ipynb b/6th hometask/1th_task.ipynb similarity index 100% rename from 1th_task.ipynb rename to 6th hometask/1th_task.ipynb diff --git a/2th_task.ipynb b/6th hometask/2th_task.ipynb similarity index 100% rename from 2th_task.ipynb rename to 6th hometask/2th_task.ipynb diff --git a/3th_task.ipynb b/6th hometask/3th_task.ipynb similarity index 100% rename from 3th_task.ipynb rename to 6th hometask/3th_task.ipynb diff --git a/4th_task.ipynb b/6th hometask/4th_task.ipynb similarity index 100% rename from 4th_task.ipynb rename to 6th hometask/4th_task.ipynb diff --git a/5th_task.ipynb b/6th hometask/5th_task.ipynb similarity index 100% rename from 5th_task.ipynb rename to 6th hometask/5th_task.ipynb diff --git a/data/x_test.pkl b/6th hometask/data/x_test.pkl similarity index 100% rename from data/x_test.pkl rename to 6th hometask/data/x_test.pkl diff --git a/data/x_train.pkl b/6th hometask/data/x_train.pkl similarity index 100% rename from data/x_train.pkl rename to 6th hometask/data/x_train.pkl diff --git a/data/y_test.pkl b/6th hometask/data/y_test.pkl similarity index 100% rename from data/y_test.pkl rename to 6th hometask/data/y_test.pkl diff --git a/data/y_train.pkl b/6th hometask/data/y_train.pkl similarity index 100% rename from data/y_train.pkl rename to 6th hometask/data/y_train.pkl diff --git a/score/scores.pkl b/6th hometask/score/scores.pkl similarity index 100% rename from score/scores.pkl rename to 6th hometask/score/scores.pkl