From baf5015fa156b876ca4d3b74322146cb36e73113 Mon Sep 17 00:00:00 2001
From: Ananya Gupta <145869907+ananyag309@users.noreply.github.com>
Date: Wed, 7 Aug 2024 00:02:49 +0530
Subject: [PATCH] Add files via upload
---
.../Startup Profit Prediction/50_Startups.csv | 51 +
.../Startup Profit Prediction/README.md | 45 +
.../Startup_Profit_Prediction.ipynb | 3313 +++++++++++++++++
3 files changed, 3409 insertions(+)
create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv
create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md
create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb
diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv
new file mode 100644
index 00000000..14ffb860
--- /dev/null
+++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv
@@ -0,0 +1,51 @@
+R&D Spend,Administration,Marketing Spend,State,Profit
+165349.2,136897.8,471784.1,New York,192261.83
+162597.7,151377.59,443898.53,California,191792.06
+153441.51,101145.55,407934.54,Florida,191050.39
+144372.41,118671.85,383199.62,New York,182901.99
+142107.34,91391.77,366168.42,Florida,166187.94
+131876.9,99814.71,362861.36,New York,156991.12
+134615.46,147198.87,127716.82,California,156122.51
+130298.13,145530.06,323876.68,Florida,155752.6
+120542.52,148718.95,311613.29,New York,152211.77
+123334.88,108679.17,304981.62,California,149759.96
+101913.08,110594.11,229160.95,Florida,146121.95
+100671.96,91790.61,249744.55,California,144259.4
+93863.75,127320.38,249839.44,Florida,141585.52
+91992.39,135495.07,252664.93,California,134307.35
+119943.24,156547.42,256512.92,Florida,132602.65
+114523.61,122616.84,261776.23,New York,129917.04
+78013.11,121597.55,264346.06,California,126992.93
+94657.16,145077.58,282574.31,New York,125370.37
+91749.16,114175.79,294919.57,Florida,124266.9
+86419.7,153514.11,0,New York,122776.86
+76253.86,113867.3,298664.47,California,118474.03
+78389.47,153773.43,299737.29,New York,111313.02
+73994.56,122782.75,303319.26,Florida,110352.25
+67532.53,105751.03,304768.73,Florida,108733.99
+77044.01,99281.34,140574.81,New York,108552.04
+64664.71,139553.16,137962.62,California,107404.34
+75328.87,144135.98,134050.07,Florida,105733.54
+72107.6,127864.55,353183.81,New York,105008.31
+66051.52,182645.56,118148.2,Florida,103282.38
+65605.48,153032.06,107138.38,New York,101004.64
+61994.48,115641.28,91131.24,Florida,99937.59
+61136.38,152701.92,88218.23,New York,97483.56
+63408.86,129219.61,46085.25,California,97427.84
+55493.95,103057.49,214634.81,Florida,96778.92
+46426.07,157693.92,210797.67,California,96712.8
+46014.02,85047.44,205517.64,New York,96479.51
+28663.76,127056.21,201126.82,Florida,90708.19
+44069.95,51283.14,197029.42,California,89949.14
+20229.59,65947.93,185265.1,New York,81229.06
+38558.51,82982.09,174999.3,California,81005.76
+28754.33,118546.05,172795.67,California,78239.91
+27892.92,84710.77,164470.71,Florida,77798.83
+23640.93,96189.63,148001.11,California,71498.49
+15505.73,127382.3,35534.17,New York,69758.98
+22177.74,154806.14,28334.72,California,65200.33
+1000.23,124153.04,1903.93,New York,64926.08
+1315.46,115816.21,297114.46,Florida,49490.75
+0,135426.92,0,California,42559.73
+542.05,51743.15,0,New York,35673.41
+0,116983.8,45173.06,California,14681.4
\ No newline at end of file
diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md
new file mode 100644
index 00000000..a915cee9
--- /dev/null
+++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md
@@ -0,0 +1,45 @@
+## **Startup Profit Prediction**
+**GOAL**
+
+The goal of this project is to analyse and predict profit of a startup from features as 'R&D Spend', 'Administration', 'Marketing Spend', 'State' etc.
+
+**DATASET**
+
+Dataset can be downloaded from https://www.kaggle.com/sonalisingh1411/startup50
+
+**WHAT I HAD DONE**
+- Step 1: Data Exploration
+- Step 2: Data Preparation
+- Step 3: Data Training
+- Step 4: Model Creation
+- Step 5: Performance Check
+
+
+**MODELS USED**
+- Linear Regression
+- Lasso Regression
+- Ridge Regression
+
+**LIBRARIES NEEDED**
+- pandas
+- numpy
+- sklearn (For data training, importing models and performance check)
+
+**Accuracy of different models used**
+- By using Linear Regression model
+ ```python
+ Accuracy achieved : 94.87
+ ```
+ - By using Lasso Regression model
+ ```python
+ Accuracy achieved : 94.87
+ ```
+ - By using Ridge Regression model
+ ```python
+ Accuracy achieved : 94.87
+ ```
+
+**CONCLUSION**
+
+* All 3 regression algorithms used in this project are equally efficient for the given dataset.
+* RMSE for Ridge Regression is least
\ No newline at end of file
diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb
new file mode 100644
index 00000000..1ced84f3
--- /dev/null
+++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb
@@ -0,0 +1,3313 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Startup_Profit_Prediction.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## **Data Exploration**"
+ ],
+ "metadata": {
+ "id": "eDvsUsu5Id3m"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "ewqbUJv1AQaS"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data=pd.read_csv('/content/50_Startups.csv')\n",
+ "data.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "OkMSu_Z7Aatz",
+ "outputId": "ca131c46-392a-4be0-ded0-467989be79de"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R&D Spend | \n",
+ " Administration | \n",
+ " Marketing Spend | \n",
+ " State | \n",
+ " Profit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 165349.20 | \n",
+ " 136897.80 | \n",
+ " 471784.10 | \n",
+ " New York | \n",
+ " 192261.83 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 162597.70 | \n",
+ " 151377.59 | \n",
+ " 443898.53 | \n",
+ " California | \n",
+ " 191792.06 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 153441.51 | \n",
+ " 101145.55 | \n",
+ " 407934.54 | \n",
+ " Florida | \n",
+ " 191050.39 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 144372.41 | \n",
+ " 118671.85 | \n",
+ " 383199.62 | \n",
+ " New York | \n",
+ " 182901.99 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 142107.34 | \n",
+ " 91391.77 | \n",
+ " 366168.42 | \n",
+ " Florida | \n",
+ " 166187.94 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " R&D Spend Administration Marketing Spend State Profit\n",
+ "0 165349.20 136897.80 471784.10 New York 192261.83\n",
+ "1 162597.70 151377.59 443898.53 California 191792.06\n",
+ "2 153441.51 101145.55 407934.54 Florida 191050.39\n",
+ "3 144372.41 118671.85 383199.62 New York 182901.99\n",
+ "4 142107.34 91391.77 366168.42 Florida 166187.94"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_A36EGFvAyfp",
+ "outputId": "92794687-844b-4f3a-b2be-9570561b6493"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(50, 5)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.columns"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "KR3zRsslBBVj",
+ "outputId": "34d84f79-ff79-41ca-b42e-23721ecebf16"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['R&D Spend', 'Administration', 'Marketing Spend', 'State', 'Profit'], dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.dtypes"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "CW0ecCtGBILi",
+ "outputId": "d3d47527-d1cf-4648-9152-f296839e9f04"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "R&D Spend float64\n",
+ "Administration float64\n",
+ "Marketing Spend float64\n",
+ "State object\n",
+ "Profit float64\n",
+ "dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.info()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "60EREBWYCu1s",
+ "outputId": "842eabe5-b6c6-44ed-d684-f4f254e8a69c"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 50 entries, 0 to 49\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 R&D Spend 50 non-null float64\n",
+ " 1 Administration 50 non-null float64\n",
+ " 2 Marketing Spend 50 non-null float64\n",
+ " 3 State 50 non-null object \n",
+ " 4 Profit 50 non-null float64\n",
+ "dtypes: float64(4), object(1)\n",
+ "memory usage: 2.1+ KB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.describe()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "enn49lKCB7vw",
+ "outputId": "7687ffb7-2553-4f31-8c37-d68d692a0d89"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R&D Spend | \n",
+ " Administration | \n",
+ " Marketing Spend | \n",
+ " Profit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ " 50.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 73721.615600 | \n",
+ " 121344.639600 | \n",
+ " 211025.097800 | \n",
+ " 112012.639200 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 45902.256482 | \n",
+ " 28017.802755 | \n",
+ " 122290.310726 | \n",
+ " 40306.180338 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.000000 | \n",
+ " 51283.140000 | \n",
+ " 0.000000 | \n",
+ " 14681.400000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 39936.370000 | \n",
+ " 103730.875000 | \n",
+ " 129300.132500 | \n",
+ " 90138.902500 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 73051.080000 | \n",
+ " 122699.795000 | \n",
+ " 212716.240000 | \n",
+ " 107978.190000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 101602.800000 | \n",
+ " 144842.180000 | \n",
+ " 299469.085000 | \n",
+ " 139765.977500 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 165349.200000 | \n",
+ " 182645.560000 | \n",
+ " 471784.100000 | \n",
+ " 192261.830000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " R&D Spend Administration Marketing Spend Profit\n",
+ "count 50.000000 50.000000 50.000000 50.000000\n",
+ "mean 73721.615600 121344.639600 211025.097800 112012.639200\n",
+ "std 45902.256482 28017.802755 122290.310726 40306.180338\n",
+ "min 0.000000 51283.140000 0.000000 14681.400000\n",
+ "25% 39936.370000 103730.875000 129300.132500 90138.902500\n",
+ "50% 73051.080000 122699.795000 212716.240000 107978.190000\n",
+ "75% 101602.800000 144842.180000 299469.085000 139765.977500\n",
+ "max 165349.200000 182645.560000 471784.100000 192261.830000"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## **Data Preparation**"
+ ],
+ "metadata": {
+ "id": "-L_2h_QtIkVd"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.isnull().sum() #to check for any null/missing values."
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ec-HkrQpCAgY",
+ "outputId": "5fe2435f-7aff-4d86-ec0f-64c9b8f69e30"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "R&D Spend 0\n",
+ "Administration 0\n",
+ "Marketing Spend 0\n",
+ "State 0\n",
+ "Profit 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data.isnull().sum().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mWqKuJuhCFdl",
+ "outputId": "f1521180-11ac-4161-ac6e-99f4594ebf38"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data['State'].nunique()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8U_WW5jNCfxE",
+ "outputId": "5ef09369-8943-4321-dd73-2fa822dbeaec"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data['State'].unique()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Om248DvSCmMd",
+ "outputId": "66081326-6197-4cae-af09-58d72680756a"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['New York', 'California', 'Florida'], dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# converting State column which is object datatype to int data type.\n",
+ "columns=['State']\n",
+ "data1=data[columns]\n",
+ "dummies=pd.get_dummies(data1,columns=['State'])\n",
+ "dummies"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "td3HIRArCowD",
+ "outputId": "30ff0355-55b2-4cbe-e918-b94131f23619"
+ },
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " State_California | \n",
+ " State_Florida | \n",
+ " State_New York | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " State_California State_Florida State_New York\n",
+ "0 0 0 1\n",
+ "1 1 0 0\n",
+ "2 0 1 0\n",
+ "3 0 0 1\n",
+ "4 0 1 0\n",
+ "5 0 0 1\n",
+ "6 1 0 0\n",
+ "7 0 1 0\n",
+ "8 0 0 1\n",
+ "9 1 0 0\n",
+ "10 0 1 0\n",
+ "11 1 0 0\n",
+ "12 0 1 0\n",
+ "13 1 0 0\n",
+ "14 0 1 0\n",
+ "15 0 0 1\n",
+ "16 1 0 0\n",
+ "17 0 0 1\n",
+ "18 0 1 0\n",
+ "19 0 0 1\n",
+ "20 1 0 0\n",
+ "21 0 0 1\n",
+ "22 0 1 0\n",
+ "23 0 1 0\n",
+ "24 0 0 1\n",
+ "25 1 0 0\n",
+ "26 0 1 0\n",
+ "27 0 0 1\n",
+ "28 0 1 0\n",
+ "29 0 0 1\n",
+ "30 0 1 0\n",
+ "31 0 0 1\n",
+ "32 1 0 0\n",
+ "33 0 1 0\n",
+ "34 1 0 0\n",
+ "35 0 0 1\n",
+ "36 0 1 0\n",
+ "37 1 0 0\n",
+ "38 0 0 1\n",
+ "39 1 0 0\n",
+ "40 1 0 0\n",
+ "41 0 1 0\n",
+ "42 1 0 0\n",
+ "43 0 0 1\n",
+ "44 1 0 0\n",
+ "45 0 0 1\n",
+ "46 0 1 0\n",
+ "47 1 0 0\n",
+ "48 0 0 1\n",
+ "49 1 0 0"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "mergeddata= pd.concat([data,dummies],axis='columns')\n",
+ "mergeddata"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "-4AyiyUYFy9M",
+ "outputId": "e6edd2d4-b0f5-47b2-b9db-9db7ffbfba1d"
+ },
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R&D Spend | \n",
+ " Administration | \n",
+ " Marketing Spend | \n",
+ " State | \n",
+ " Profit | \n",
+ " State_California | \n",
+ " State_Florida | \n",
+ " State_New York | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 165349.20 | \n",
+ " 136897.80 | \n",
+ " 471784.10 | \n",
+ " New York | \n",
+ " 192261.83 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 162597.70 | \n",
+ " 151377.59 | \n",
+ " 443898.53 | \n",
+ " California | \n",
+ " 191792.06 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 153441.51 | \n",
+ " 101145.55 | \n",
+ " 407934.54 | \n",
+ " Florida | \n",
+ " 191050.39 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 144372.41 | \n",
+ " 118671.85 | \n",
+ " 383199.62 | \n",
+ " New York | \n",
+ " 182901.99 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 142107.34 | \n",
+ " 91391.77 | \n",
+ " 366168.42 | \n",
+ " Florida | \n",
+ " 166187.94 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 131876.90 | \n",
+ " 99814.71 | \n",
+ " 362861.36 | \n",
+ " New York | \n",
+ " 156991.12 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 134615.46 | \n",
+ " 147198.87 | \n",
+ " 127716.82 | \n",
+ " California | \n",
+ " 156122.51 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 130298.13 | \n",
+ " 145530.06 | \n",
+ " 323876.68 | \n",
+ " Florida | \n",
+ " 155752.60 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 120542.52 | \n",
+ " 148718.95 | \n",
+ " 311613.29 | \n",
+ " New York | \n",
+ " 152211.77 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 123334.88 | \n",
+ " 108679.17 | \n",
+ " 304981.62 | \n",
+ " California | \n",
+ " 149759.96 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 101913.08 | \n",
+ " 110594.11 | \n",
+ " 229160.95 | \n",
+ " Florida | \n",
+ " 146121.95 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 100671.96 | \n",
+ " 91790.61 | \n",
+ " 249744.55 | \n",
+ " California | \n",
+ " 144259.40 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 93863.75 | \n",
+ " 127320.38 | \n",
+ " 249839.44 | \n",
+ " Florida | \n",
+ " 141585.52 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 91992.39 | \n",
+ " 135495.07 | \n",
+ " 252664.93 | \n",
+ " California | \n",
+ " 134307.35 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 119943.24 | \n",
+ " 156547.42 | \n",
+ " 256512.92 | \n",
+ " Florida | \n",
+ " 132602.65 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 114523.61 | \n",
+ " 122616.84 | \n",
+ " 261776.23 | \n",
+ " New York | \n",
+ " 129917.04 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 78013.11 | \n",
+ " 121597.55 | \n",
+ " 264346.06 | \n",
+ " California | \n",
+ " 126992.93 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 94657.16 | \n",
+ " 145077.58 | \n",
+ " 282574.31 | \n",
+ " New York | \n",
+ " 125370.37 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 91749.16 | \n",
+ " 114175.79 | \n",
+ " 294919.57 | \n",
+ " Florida | \n",
+ " 124266.90 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 86419.70 | \n",
+ " 153514.11 | \n",
+ " 0.00 | \n",
+ " New York | \n",
+ " 122776.86 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 76253.86 | \n",
+ " 113867.30 | \n",
+ " 298664.47 | \n",
+ " California | \n",
+ " 118474.03 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 78389.47 | \n",
+ " 153773.43 | \n",
+ " 299737.29 | \n",
+ " New York | \n",
+ " 111313.02 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 73994.56 | \n",
+ " 122782.75 | \n",
+ " 303319.26 | \n",
+ " Florida | \n",
+ " 110352.25 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 67532.53 | \n",
+ " 105751.03 | \n",
+ " 304768.73 | \n",
+ " Florida | \n",
+ " 108733.99 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 77044.01 | \n",
+ " 99281.34 | \n",
+ " 140574.81 | \n",
+ " New York | \n",
+ " 108552.04 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 64664.71 | \n",
+ " 139553.16 | \n",
+ " 137962.62 | \n",
+ " California | \n",
+ " 107404.34 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 75328.87 | \n",
+ " 144135.98 | \n",
+ " 134050.07 | \n",
+ " Florida | \n",
+ " 105733.54 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 72107.60 | \n",
+ " 127864.55 | \n",
+ " 353183.81 | \n",
+ " New York | \n",
+ " 105008.31 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 66051.52 | \n",
+ " 182645.56 | \n",
+ " 118148.20 | \n",
+ " Florida | \n",
+ " 103282.38 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 65605.48 | \n",
+ " 153032.06 | \n",
+ " 107138.38 | \n",
+ " New York | \n",
+ " 101004.64 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 61994.48 | \n",
+ " 115641.28 | \n",
+ " 91131.24 | \n",
+ " Florida | \n",
+ " 99937.59 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 61136.38 | \n",
+ " 152701.92 | \n",
+ " 88218.23 | \n",
+ " New York | \n",
+ " 97483.56 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 63408.86 | \n",
+ " 129219.61 | \n",
+ " 46085.25 | \n",
+ " California | \n",
+ " 97427.84 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 55493.95 | \n",
+ " 103057.49 | \n",
+ " 214634.81 | \n",
+ " Florida | \n",
+ " 96778.92 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 46426.07 | \n",
+ " 157693.92 | \n",
+ " 210797.67 | \n",
+ " California | \n",
+ " 96712.80 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 46014.02 | \n",
+ " 85047.44 | \n",
+ " 205517.64 | \n",
+ " New York | \n",
+ " 96479.51 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 28663.76 | \n",
+ " 127056.21 | \n",
+ " 201126.82 | \n",
+ " Florida | \n",
+ " 90708.19 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 44069.95 | \n",
+ " 51283.14 | \n",
+ " 197029.42 | \n",
+ " California | \n",
+ " 89949.14 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 20229.59 | \n",
+ " 65947.93 | \n",
+ " 185265.10 | \n",
+ " New York | \n",
+ " 81229.06 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 38558.51 | \n",
+ " 82982.09 | \n",
+ " 174999.30 | \n",
+ " California | \n",
+ " 81005.76 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 28754.33 | \n",
+ " 118546.05 | \n",
+ " 172795.67 | \n",
+ " California | \n",
+ " 78239.91 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 27892.92 | \n",
+ " 84710.77 | \n",
+ " 164470.71 | \n",
+ " Florida | \n",
+ " 77798.83 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 23640.93 | \n",
+ " 96189.63 | \n",
+ " 148001.11 | \n",
+ " California | \n",
+ " 71498.49 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 15505.73 | \n",
+ " 127382.30 | \n",
+ " 35534.17 | \n",
+ " New York | \n",
+ " 69758.98 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 22177.74 | \n",
+ " 154806.14 | \n",
+ " 28334.72 | \n",
+ " California | \n",
+ " 65200.33 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 1000.23 | \n",
+ " 124153.04 | \n",
+ " 1903.93 | \n",
+ " New York | \n",
+ " 64926.08 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 1315.46 | \n",
+ " 115816.21 | \n",
+ " 297114.46 | \n",
+ " Florida | \n",
+ " 49490.75 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 0.00 | \n",
+ " 135426.92 | \n",
+ " 0.00 | \n",
+ " California | \n",
+ " 42559.73 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 542.05 | \n",
+ " 51743.15 | \n",
+ " 0.00 | \n",
+ " New York | \n",
+ " 35673.41 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 0.00 | \n",
+ " 116983.80 | \n",
+ " 45173.06 | \n",
+ " California | \n",
+ " 14681.40 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " R&D Spend Administration Marketing Spend State Profit \\\n",
+ "0 165349.20 136897.80 471784.10 New York 192261.83 \n",
+ "1 162597.70 151377.59 443898.53 California 191792.06 \n",
+ "2 153441.51 101145.55 407934.54 Florida 191050.39 \n",
+ "3 144372.41 118671.85 383199.62 New York 182901.99 \n",
+ "4 142107.34 91391.77 366168.42 Florida 166187.94 \n",
+ "5 131876.90 99814.71 362861.36 New York 156991.12 \n",
+ "6 134615.46 147198.87 127716.82 California 156122.51 \n",
+ "7 130298.13 145530.06 323876.68 Florida 155752.60 \n",
+ "8 120542.52 148718.95 311613.29 New York 152211.77 \n",
+ "9 123334.88 108679.17 304981.62 California 149759.96 \n",
+ "10 101913.08 110594.11 229160.95 Florida 146121.95 \n",
+ "11 100671.96 91790.61 249744.55 California 144259.40 \n",
+ "12 93863.75 127320.38 249839.44 Florida 141585.52 \n",
+ "13 91992.39 135495.07 252664.93 California 134307.35 \n",
+ "14 119943.24 156547.42 256512.92 Florida 132602.65 \n",
+ "15 114523.61 122616.84 261776.23 New York 129917.04 \n",
+ "16 78013.11 121597.55 264346.06 California 126992.93 \n",
+ "17 94657.16 145077.58 282574.31 New York 125370.37 \n",
+ "18 91749.16 114175.79 294919.57 Florida 124266.90 \n",
+ "19 86419.70 153514.11 0.00 New York 122776.86 \n",
+ "20 76253.86 113867.30 298664.47 California 118474.03 \n",
+ "21 78389.47 153773.43 299737.29 New York 111313.02 \n",
+ "22 73994.56 122782.75 303319.26 Florida 110352.25 \n",
+ "23 67532.53 105751.03 304768.73 Florida 108733.99 \n",
+ "24 77044.01 99281.34 140574.81 New York 108552.04 \n",
+ "25 64664.71 139553.16 137962.62 California 107404.34 \n",
+ "26 75328.87 144135.98 134050.07 Florida 105733.54 \n",
+ "27 72107.60 127864.55 353183.81 New York 105008.31 \n",
+ "28 66051.52 182645.56 118148.20 Florida 103282.38 \n",
+ "29 65605.48 153032.06 107138.38 New York 101004.64 \n",
+ "30 61994.48 115641.28 91131.24 Florida 99937.59 \n",
+ "31 61136.38 152701.92 88218.23 New York 97483.56 \n",
+ "32 63408.86 129219.61 46085.25 California 97427.84 \n",
+ "33 55493.95 103057.49 214634.81 Florida 96778.92 \n",
+ "34 46426.07 157693.92 210797.67 California 96712.80 \n",
+ "35 46014.02 85047.44 205517.64 New York 96479.51 \n",
+ "36 28663.76 127056.21 201126.82 Florida 90708.19 \n",
+ "37 44069.95 51283.14 197029.42 California 89949.14 \n",
+ "38 20229.59 65947.93 185265.10 New York 81229.06 \n",
+ "39 38558.51 82982.09 174999.30 California 81005.76 \n",
+ "40 28754.33 118546.05 172795.67 California 78239.91 \n",
+ "41 27892.92 84710.77 164470.71 Florida 77798.83 \n",
+ "42 23640.93 96189.63 148001.11 California 71498.49 \n",
+ "43 15505.73 127382.30 35534.17 New York 69758.98 \n",
+ "44 22177.74 154806.14 28334.72 California 65200.33 \n",
+ "45 1000.23 124153.04 1903.93 New York 64926.08 \n",
+ "46 1315.46 115816.21 297114.46 Florida 49490.75 \n",
+ "47 0.00 135426.92 0.00 California 42559.73 \n",
+ "48 542.05 51743.15 0.00 New York 35673.41 \n",
+ "49 0.00 116983.80 45173.06 California 14681.40 \n",
+ "\n",
+ " State_California State_Florida State_New York \n",
+ "0 0 0 1 \n",
+ "1 1 0 0 \n",
+ "2 0 1 0 \n",
+ "3 0 0 1 \n",
+ "4 0 1 0 \n",
+ "5 0 0 1 \n",
+ "6 1 0 0 \n",
+ "7 0 1 0 \n",
+ "8 0 0 1 \n",
+ "9 1 0 0 \n",
+ "10 0 1 0 \n",
+ "11 1 0 0 \n",
+ "12 0 1 0 \n",
+ "13 1 0 0 \n",
+ "14 0 1 0 \n",
+ "15 0 0 1 \n",
+ "16 1 0 0 \n",
+ "17 0 0 1 \n",
+ "18 0 1 0 \n",
+ "19 0 0 1 \n",
+ "20 1 0 0 \n",
+ "21 0 0 1 \n",
+ "22 0 1 0 \n",
+ "23 0 1 0 \n",
+ "24 0 0 1 \n",
+ "25 1 0 0 \n",
+ "26 0 1 0 \n",
+ "27 0 0 1 \n",
+ "28 0 1 0 \n",
+ "29 0 0 1 \n",
+ "30 0 1 0 \n",
+ "31 0 0 1 \n",
+ "32 1 0 0 \n",
+ "33 0 1 0 \n",
+ "34 1 0 0 \n",
+ "35 0 0 1 \n",
+ "36 0 1 0 \n",
+ "37 1 0 0 \n",
+ "38 0 0 1 \n",
+ "39 1 0 0 \n",
+ "40 1 0 0 \n",
+ "41 0 1 0 \n",
+ "42 1 0 0 \n",
+ "43 0 0 1 \n",
+ "44 1 0 0 \n",
+ "45 0 0 1 \n",
+ "46 0 1 0 \n",
+ "47 1 0 0 \n",
+ "48 0 0 1 \n",
+ "49 1 0 0 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "newdata=mergeddata.drop(['State'],axis='columns')\n",
+ "newdata"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "DGDE3wKqF9nh",
+ "outputId": "e179dea2-61f2-42a6-f28a-695f21055a82"
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R&D Spend | \n",
+ " Administration | \n",
+ " Marketing Spend | \n",
+ " Profit | \n",
+ " State_California | \n",
+ " State_Florida | \n",
+ " State_New York | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 165349.20 | \n",
+ " 136897.80 | \n",
+ " 471784.10 | \n",
+ " 192261.83 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 162597.70 | \n",
+ " 151377.59 | \n",
+ " 443898.53 | \n",
+ " 191792.06 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 153441.51 | \n",
+ " 101145.55 | \n",
+ " 407934.54 | \n",
+ " 191050.39 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 144372.41 | \n",
+ " 118671.85 | \n",
+ " 383199.62 | \n",
+ " 182901.99 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 142107.34 | \n",
+ " 91391.77 | \n",
+ " 366168.42 | \n",
+ " 166187.94 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 131876.90 | \n",
+ " 99814.71 | \n",
+ " 362861.36 | \n",
+ " 156991.12 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 134615.46 | \n",
+ " 147198.87 | \n",
+ " 127716.82 | \n",
+ " 156122.51 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 130298.13 | \n",
+ " 145530.06 | \n",
+ " 323876.68 | \n",
+ " 155752.60 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 120542.52 | \n",
+ " 148718.95 | \n",
+ " 311613.29 | \n",
+ " 152211.77 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 123334.88 | \n",
+ " 108679.17 | \n",
+ " 304981.62 | \n",
+ " 149759.96 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 101913.08 | \n",
+ " 110594.11 | \n",
+ " 229160.95 | \n",
+ " 146121.95 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 100671.96 | \n",
+ " 91790.61 | \n",
+ " 249744.55 | \n",
+ " 144259.40 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 93863.75 | \n",
+ " 127320.38 | \n",
+ " 249839.44 | \n",
+ " 141585.52 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 91992.39 | \n",
+ " 135495.07 | \n",
+ " 252664.93 | \n",
+ " 134307.35 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 119943.24 | \n",
+ " 156547.42 | \n",
+ " 256512.92 | \n",
+ " 132602.65 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 114523.61 | \n",
+ " 122616.84 | \n",
+ " 261776.23 | \n",
+ " 129917.04 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 78013.11 | \n",
+ " 121597.55 | \n",
+ " 264346.06 | \n",
+ " 126992.93 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 94657.16 | \n",
+ " 145077.58 | \n",
+ " 282574.31 | \n",
+ " 125370.37 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 91749.16 | \n",
+ " 114175.79 | \n",
+ " 294919.57 | \n",
+ " 124266.90 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 86419.70 | \n",
+ " 153514.11 | \n",
+ " 0.00 | \n",
+ " 122776.86 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 76253.86 | \n",
+ " 113867.30 | \n",
+ " 298664.47 | \n",
+ " 118474.03 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 78389.47 | \n",
+ " 153773.43 | \n",
+ " 299737.29 | \n",
+ " 111313.02 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 73994.56 | \n",
+ " 122782.75 | \n",
+ " 303319.26 | \n",
+ " 110352.25 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 67532.53 | \n",
+ " 105751.03 | \n",
+ " 304768.73 | \n",
+ " 108733.99 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 77044.01 | \n",
+ " 99281.34 | \n",
+ " 140574.81 | \n",
+ " 108552.04 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 64664.71 | \n",
+ " 139553.16 | \n",
+ " 137962.62 | \n",
+ " 107404.34 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 75328.87 | \n",
+ " 144135.98 | \n",
+ " 134050.07 | \n",
+ " 105733.54 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 72107.60 | \n",
+ " 127864.55 | \n",
+ " 353183.81 | \n",
+ " 105008.31 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 66051.52 | \n",
+ " 182645.56 | \n",
+ " 118148.20 | \n",
+ " 103282.38 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 65605.48 | \n",
+ " 153032.06 | \n",
+ " 107138.38 | \n",
+ " 101004.64 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 61994.48 | \n",
+ " 115641.28 | \n",
+ " 91131.24 | \n",
+ " 99937.59 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 61136.38 | \n",
+ " 152701.92 | \n",
+ " 88218.23 | \n",
+ " 97483.56 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 63408.86 | \n",
+ " 129219.61 | \n",
+ " 46085.25 | \n",
+ " 97427.84 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 55493.95 | \n",
+ " 103057.49 | \n",
+ " 214634.81 | \n",
+ " 96778.92 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 46426.07 | \n",
+ " 157693.92 | \n",
+ " 210797.67 | \n",
+ " 96712.80 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 46014.02 | \n",
+ " 85047.44 | \n",
+ " 205517.64 | \n",
+ " 96479.51 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 28663.76 | \n",
+ " 127056.21 | \n",
+ " 201126.82 | \n",
+ " 90708.19 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 44069.95 | \n",
+ " 51283.14 | \n",
+ " 197029.42 | \n",
+ " 89949.14 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 20229.59 | \n",
+ " 65947.93 | \n",
+ " 185265.10 | \n",
+ " 81229.06 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 38558.51 | \n",
+ " 82982.09 | \n",
+ " 174999.30 | \n",
+ " 81005.76 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 28754.33 | \n",
+ " 118546.05 | \n",
+ " 172795.67 | \n",
+ " 78239.91 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 27892.92 | \n",
+ " 84710.77 | \n",
+ " 164470.71 | \n",
+ " 77798.83 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 23640.93 | \n",
+ " 96189.63 | \n",
+ " 148001.11 | \n",
+ " 71498.49 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 15505.73 | \n",
+ " 127382.30 | \n",
+ " 35534.17 | \n",
+ " 69758.98 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 22177.74 | \n",
+ " 154806.14 | \n",
+ " 28334.72 | \n",
+ " 65200.33 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 1000.23 | \n",
+ " 124153.04 | \n",
+ " 1903.93 | \n",
+ " 64926.08 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 1315.46 | \n",
+ " 115816.21 | \n",
+ " 297114.46 | \n",
+ " 49490.75 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 0.00 | \n",
+ " 135426.92 | \n",
+ " 0.00 | \n",
+ " 42559.73 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 542.05 | \n",
+ " 51743.15 | \n",
+ " 0.00 | \n",
+ " 35673.41 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 0.00 | \n",
+ " 116983.80 | \n",
+ " 45173.06 | \n",
+ " 14681.40 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " R&D Spend Administration Marketing Spend Profit State_California \\\n",
+ "0 165349.20 136897.80 471784.10 192261.83 0 \n",
+ "1 162597.70 151377.59 443898.53 191792.06 1 \n",
+ "2 153441.51 101145.55 407934.54 191050.39 0 \n",
+ "3 144372.41 118671.85 383199.62 182901.99 0 \n",
+ "4 142107.34 91391.77 366168.42 166187.94 0 \n",
+ "5 131876.90 99814.71 362861.36 156991.12 0 \n",
+ "6 134615.46 147198.87 127716.82 156122.51 1 \n",
+ "7 130298.13 145530.06 323876.68 155752.60 0 \n",
+ "8 120542.52 148718.95 311613.29 152211.77 0 \n",
+ "9 123334.88 108679.17 304981.62 149759.96 1 \n",
+ "10 101913.08 110594.11 229160.95 146121.95 0 \n",
+ "11 100671.96 91790.61 249744.55 144259.40 1 \n",
+ "12 93863.75 127320.38 249839.44 141585.52 0 \n",
+ "13 91992.39 135495.07 252664.93 134307.35 1 \n",
+ "14 119943.24 156547.42 256512.92 132602.65 0 \n",
+ "15 114523.61 122616.84 261776.23 129917.04 0 \n",
+ "16 78013.11 121597.55 264346.06 126992.93 1 \n",
+ "17 94657.16 145077.58 282574.31 125370.37 0 \n",
+ "18 91749.16 114175.79 294919.57 124266.90 0 \n",
+ "19 86419.70 153514.11 0.00 122776.86 0 \n",
+ "20 76253.86 113867.30 298664.47 118474.03 1 \n",
+ "21 78389.47 153773.43 299737.29 111313.02 0 \n",
+ "22 73994.56 122782.75 303319.26 110352.25 0 \n",
+ "23 67532.53 105751.03 304768.73 108733.99 0 \n",
+ "24 77044.01 99281.34 140574.81 108552.04 0 \n",
+ "25 64664.71 139553.16 137962.62 107404.34 1 \n",
+ "26 75328.87 144135.98 134050.07 105733.54 0 \n",
+ "27 72107.60 127864.55 353183.81 105008.31 0 \n",
+ "28 66051.52 182645.56 118148.20 103282.38 0 \n",
+ "29 65605.48 153032.06 107138.38 101004.64 0 \n",
+ "30 61994.48 115641.28 91131.24 99937.59 0 \n",
+ "31 61136.38 152701.92 88218.23 97483.56 0 \n",
+ "32 63408.86 129219.61 46085.25 97427.84 1 \n",
+ "33 55493.95 103057.49 214634.81 96778.92 0 \n",
+ "34 46426.07 157693.92 210797.67 96712.80 1 \n",
+ "35 46014.02 85047.44 205517.64 96479.51 0 \n",
+ "36 28663.76 127056.21 201126.82 90708.19 0 \n",
+ "37 44069.95 51283.14 197029.42 89949.14 1 \n",
+ "38 20229.59 65947.93 185265.10 81229.06 0 \n",
+ "39 38558.51 82982.09 174999.30 81005.76 1 \n",
+ "40 28754.33 118546.05 172795.67 78239.91 1 \n",
+ "41 27892.92 84710.77 164470.71 77798.83 0 \n",
+ "42 23640.93 96189.63 148001.11 71498.49 1 \n",
+ "43 15505.73 127382.30 35534.17 69758.98 0 \n",
+ "44 22177.74 154806.14 28334.72 65200.33 1 \n",
+ "45 1000.23 124153.04 1903.93 64926.08 0 \n",
+ "46 1315.46 115816.21 297114.46 49490.75 0 \n",
+ "47 0.00 135426.92 0.00 42559.73 1 \n",
+ "48 542.05 51743.15 0.00 35673.41 0 \n",
+ "49 0.00 116983.80 45173.06 14681.40 1 \n",
+ "\n",
+ " State_Florida State_New York \n",
+ "0 0 1 \n",
+ "1 0 0 \n",
+ "2 1 0 \n",
+ "3 0 1 \n",
+ "4 1 0 \n",
+ "5 0 1 \n",
+ "6 0 0 \n",
+ "7 1 0 \n",
+ "8 0 1 \n",
+ "9 0 0 \n",
+ "10 1 0 \n",
+ "11 0 0 \n",
+ "12 1 0 \n",
+ "13 0 0 \n",
+ "14 1 0 \n",
+ "15 0 1 \n",
+ "16 0 0 \n",
+ "17 0 1 \n",
+ "18 1 0 \n",
+ "19 0 1 \n",
+ "20 0 0 \n",
+ "21 0 1 \n",
+ "22 1 0 \n",
+ "23 1 0 \n",
+ "24 0 1 \n",
+ "25 0 0 \n",
+ "26 1 0 \n",
+ "27 0 1 \n",
+ "28 1 0 \n",
+ "29 0 1 \n",
+ "30 1 0 \n",
+ "31 0 1 \n",
+ "32 0 0 \n",
+ "33 1 0 \n",
+ "34 0 0 \n",
+ "35 0 1 \n",
+ "36 1 0 \n",
+ "37 0 0 \n",
+ "38 0 1 \n",
+ "39 0 0 \n",
+ "40 0 0 \n",
+ "41 1 0 \n",
+ "42 0 0 \n",
+ "43 0 1 \n",
+ "44 0 0 \n",
+ "45 0 1 \n",
+ "46 1 0 \n",
+ "47 0 0 \n",
+ "48 0 1 \n",
+ "49 0 0 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#converting data into int datatype to avoid errors below.\n",
+ "prepareddata=newdata.astype(int)\n",
+ "prepareddata.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "pMCvSC7OCIVo",
+ "outputId": "d5a7d6a9-c7b5-456a-878b-8ab71ef0c9f5"
+ },
+ "execution_count": 24,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R&D Spend | \n",
+ " Administration | \n",
+ " Marketing Spend | \n",
+ " Profit | \n",
+ " State_California | \n",
+ " State_Florida | \n",
+ " State_New York | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 165349 | \n",
+ " 136897 | \n",
+ " 471784 | \n",
+ " 192261 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 162597 | \n",
+ " 151377 | \n",
+ " 443898 | \n",
+ " 191792 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 153441 | \n",
+ " 101145 | \n",
+ " 407934 | \n",
+ " 191050 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 144372 | \n",
+ " 118671 | \n",
+ " 383199 | \n",
+ " 182901 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 142107 | \n",
+ " 91391 | \n",
+ " 366168 | \n",
+ " 166187 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " R&D Spend Administration Marketing Spend Profit State_California \\\n",
+ "0 165349 136897 471784 192261 0 \n",
+ "1 162597 151377 443898 191792 1 \n",
+ "2 153441 101145 407934 191050 0 \n",
+ "3 144372 118671 383199 182901 0 \n",
+ "4 142107 91391 366168 166187 0 \n",
+ "\n",
+ " State_Florida State_New York \n",
+ "0 0 1 \n",
+ "1 0 0 \n",
+ "2 1 0 \n",
+ "3 0 1 \n",
+ "4 1 0 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 24
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "prepareddata.columns"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "OwVA8SgXC977",
+ "outputId": "54fdd233-b40a-4b6b-c889-1a3eaf83608f"
+ },
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['R&D Spend', 'Administration', 'Marketing Spend', 'Profit',\n",
+ " 'State_California', 'State_Florida', 'State_New York'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "prepareddata.info()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2sDm8iXbGUY7",
+ "outputId": "e3d7cc94-84bc-49cc-b7a2-ef73bdfcb608"
+ },
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 50 entries, 0 to 49\n",
+ "Data columns (total 7 columns):\n",
+ " # Column Non-Null Count Dtype\n",
+ "--- ------ -------------- -----\n",
+ " 0 R&D Spend 50 non-null int64\n",
+ " 1 Administration 50 non-null int64\n",
+ " 2 Marketing Spend 50 non-null int64\n",
+ " 3 Profit 50 non-null int64\n",
+ " 4 State_California 50 non-null int64\n",
+ " 5 State_Florida 50 non-null int64\n",
+ " 6 State_New York 50 non-null int64\n",
+ "dtypes: int64(7)\n",
+ "memory usage: 2.9 KB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## **Training Our Data**\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "P41kwXp4JBH5"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Import train_test_split from sklearn.model_selection\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "# Here, X is the data which will have features and y will have our target.\n",
+ "x=prepareddata[['R&D Spend', 'Administration', 'Marketing Spend','State_California', 'State_Florida', 'State_New York']] \n",
+ "y=prepareddata['Profit']"
+ ],
+ "metadata": {
+ "id": "MJa5GnxPCZaE"
+ },
+ "execution_count": 29,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Split data into training data and testing data\n",
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) \n",
+ "#Ratio used for splitting training and testing data is 8:2 respectively"
+ ],
+ "metadata": {
+ "id": "fDQcGTJXDAOX"
+ },
+ "execution_count": 30,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## **Model Creation**"
+ ],
+ "metadata": {
+ "id": "l6vk_SdZGwPv"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Linear Regression"
+ ],
+ "metadata": {
+ "id": "vD9AVlk4GsMm"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Importing linear regression model\n",
+ "from sklearn.linear_model import LinearRegression \n",
+ "reg1 = LinearRegression()"
+ ],
+ "metadata": {
+ "id": "bc5Dk-TbDEAm"
+ },
+ "execution_count": 31,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Fitting data into the model.\n",
+ "reg1.fit(x_train, y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-ntbFAnND5Yc",
+ "outputId": "aab48579-aad5-4dd2-ee8f-cd2ff0978c98"
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 32
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Making predictions \n",
+ "pred1 = reg1.predict(x_test)"
+ ],
+ "metadata": {
+ "id": "w11EfqAYD8Ae"
+ },
+ "execution_count": 33,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pred1"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mrGObL-WD-9x",
+ "outputId": "02a3e327-4c5f-464d-9595-4eec1ad37cb1"
+ },
+ "execution_count": 34,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([ 98484.89210481, 44026.08455385, 157308.22696881, 164609.14315337,\n",
+ " 151943.20929796, 56973.06829596, 84210.34971486, 116881.35386378,\n",
+ " 184161.16574319, 129863.57642281])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 34
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Lasso Regression"
+ ],
+ "metadata": {
+ "id": "7sbXj2IRG7Sy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Importing model\n",
+ "from sklearn.linear_model import Lasso\n",
+ "reg2 = Lasso()"
+ ],
+ "metadata": {
+ "id": "QccbhNhLEEtq"
+ },
+ "execution_count": 35,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Fitting data into the model.\n",
+ "reg2.fit(x_train, y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "qYNgs9leEFkb",
+ "outputId": "0564c43b-2bf9-4efb-8850-e1fe79d42191"
+ },
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Lasso()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 36
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Making predictions \n",
+ "pred2 = reg2.predict(x_test)"
+ ],
+ "metadata": {
+ "id": "ul8NvlADEHxP"
+ },
+ "execution_count": 37,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pred2"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3BxHt-RPEKGM",
+ "outputId": "df944e4e-8f51-4b9b-97ca-c1c20c9caa9a"
+ },
+ "execution_count": 38,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([ 98481.63268423, 44030.60188057, 157304.38045446, 164612.05593362,\n",
+ " 151947.12473384, 56977.5034078 , 84207.08582321, 116886.10440022,\n",
+ " 184159.91077455, 129867.79195108])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 38
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Ridge Regression"
+ ],
+ "metadata": {
+ "id": "6TjhWEG0HCQ7"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Importing model\n",
+ "from sklearn.linear_model import Ridge\n",
+ "reg3 = Ridge()"
+ ],
+ "metadata": {
+ "id": "eiuI077XEMF2"
+ },
+ "execution_count": 39,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Fitting data into the model.\n",
+ "reg3.fit(x_train, y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Do_IVkEKEO9i",
+ "outputId": "df33f62c-5e0f-4dca-be69-876662879aa2"
+ },
+ "execution_count": 40,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Ridge()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 40
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Making predictions \n",
+ "pred3= reg3.predict(x_test)\n"
+ ],
+ "metadata": {
+ "id": "DL4ooUm3ETQn"
+ },
+ "execution_count": 41,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pred3"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "hQ_UYvYvEYfs",
+ "outputId": "2451ab9f-0d32-4a56-c2c9-06efc590f7db"
+ },
+ "execution_count": 42,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([ 98402.18973791, 44116.59986663, 157204.03565538, 164655.26881854,\n",
+ " 152009.91085477, 57060.68872438, 84122.19767986, 116963.88574768,\n",
+ " 184158.12254095, 129936.98380266])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 42
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## **Performance Check**"
+ ],
+ "metadata": {
+ "id": "AqY21cEQHNqg"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "print(\"Model\\t\\t\\t RootMeanSquareError \\t\\t Accuracy of the model\") \n",
+ "print(\"\"\"Linear Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred1)), reg1.score(x_train,y_train)))\n",
+ "print(\"\"\"Lasso Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred2)), reg2.score(x_train,y_train)))\n",
+ "print(\"\"\"Ridge Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred3)), reg3.score(x_train,y_train)))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "k1b5N9rlEZVB",
+ "outputId": "d5ccb24d-e358-4437-cf7e-ab7d690e9466"
+ },
+ "execution_count": 43,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Model\t\t\t RootMeanSquareError \t\t Accuracy of the model\n",
+ "Linear Regression \t\t 9085.1958 \t \t\t 0.9487\n",
+ "Lasso Regression \t\t 9083.8880 \t \t\t 0.9487\n",
+ "Ridge Regression \t\t 9052.1743 \t \t\t 0.9487\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "#### **Conclusion**\n",
+ "* All 3 regression algorithms used in this project are equally efficient for the given dataset.\n",
+ "* RMSE for Ridge Regression is least."
+ ],
+ "metadata": {
+ "id": "I4INbffZHWFz"
+ }
+ }
+ ]
+}
\ No newline at end of file