From baf5015fa156b876ca4d3b74322146cb36e73113 Mon Sep 17 00:00:00 2001 From: Ananya Gupta <145869907+ananyag309@users.noreply.github.com> Date: Wed, 7 Aug 2024 00:02:49 +0530 Subject: [PATCH] Add files via upload --- .../Startup Profit Prediction/50_Startups.csv | 51 + .../Startup Profit Prediction/README.md | 45 + .../Startup_Profit_Prediction.ipynb | 3313 +++++++++++++++++ 3 files changed, 3409 insertions(+) create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md create mode 100644 Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv new file mode 100644 index 00000000..14ffb860 --- /dev/null +++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/50_Startups.csv @@ -0,0 +1,51 @@ +R&D Spend,Administration,Marketing Spend,State,Profit +165349.2,136897.8,471784.1,New York,192261.83 +162597.7,151377.59,443898.53,California,191792.06 +153441.51,101145.55,407934.54,Florida,191050.39 +144372.41,118671.85,383199.62,New York,182901.99 +142107.34,91391.77,366168.42,Florida,166187.94 +131876.9,99814.71,362861.36,New York,156991.12 +134615.46,147198.87,127716.82,California,156122.51 +130298.13,145530.06,323876.68,Florida,155752.6 +120542.52,148718.95,311613.29,New York,152211.77 +123334.88,108679.17,304981.62,California,149759.96 +101913.08,110594.11,229160.95,Florida,146121.95 +100671.96,91790.61,249744.55,California,144259.4 +93863.75,127320.38,249839.44,Florida,141585.52 +91992.39,135495.07,252664.93,California,134307.35 +119943.24,156547.42,256512.92,Florida,132602.65 +114523.61,122616.84,261776.23,New York,129917.04 +78013.11,121597.55,264346.06,California,126992.93 +94657.16,145077.58,282574.31,New York,125370.37 +91749.16,114175.79,294919.57,Florida,124266.9 +86419.7,153514.11,0,New York,122776.86 +76253.86,113867.3,298664.47,California,118474.03 +78389.47,153773.43,299737.29,New York,111313.02 +73994.56,122782.75,303319.26,Florida,110352.25 +67532.53,105751.03,304768.73,Florida,108733.99 +77044.01,99281.34,140574.81,New York,108552.04 +64664.71,139553.16,137962.62,California,107404.34 +75328.87,144135.98,134050.07,Florida,105733.54 +72107.6,127864.55,353183.81,New York,105008.31 +66051.52,182645.56,118148.2,Florida,103282.38 +65605.48,153032.06,107138.38,New York,101004.64 +61994.48,115641.28,91131.24,Florida,99937.59 +61136.38,152701.92,88218.23,New York,97483.56 +63408.86,129219.61,46085.25,California,97427.84 +55493.95,103057.49,214634.81,Florida,96778.92 +46426.07,157693.92,210797.67,California,96712.8 +46014.02,85047.44,205517.64,New York,96479.51 +28663.76,127056.21,201126.82,Florida,90708.19 +44069.95,51283.14,197029.42,California,89949.14 +20229.59,65947.93,185265.1,New York,81229.06 +38558.51,82982.09,174999.3,California,81005.76 +28754.33,118546.05,172795.67,California,78239.91 +27892.92,84710.77,164470.71,Florida,77798.83 +23640.93,96189.63,148001.11,California,71498.49 +15505.73,127382.3,35534.17,New York,69758.98 +22177.74,154806.14,28334.72,California,65200.33 +1000.23,124153.04,1903.93,New York,64926.08 +1315.46,115816.21,297114.46,Florida,49490.75 +0,135426.92,0,California,42559.73 +542.05,51743.15,0,New York,35673.41 +0,116983.8,45173.06,California,14681.4 \ No newline at end of file diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md new file mode 100644 index 00000000..a915cee9 --- /dev/null +++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/README.md @@ -0,0 +1,45 @@ +## **Startup Profit Prediction** +**GOAL** + +The goal of this project is to analyse and predict profit of a startup from features as 'R&D Spend', 'Administration', 'Marketing Spend', 'State' etc. + +**DATASET** + +Dataset can be downloaded from https://www.kaggle.com/sonalisingh1411/startup50 + +**WHAT I HAD DONE** +- Step 1: Data Exploration +- Step 2: Data Preparation +- Step 3: Data Training +- Step 4: Model Creation +- Step 5: Performance Check + + +**MODELS USED** +- Linear Regression +- Lasso Regression +- Ridge Regression + +**LIBRARIES NEEDED** +- pandas +- numpy +- sklearn (For data training, importing models and performance check) + +**Accuracy of different models used** +- By using Linear Regression model + ```python + Accuracy achieved : 94.87 + ``` + - By using Lasso Regression model + ```python + Accuracy achieved : 94.87 + ``` + - By using Ridge Regression model + ```python + Accuracy achieved : 94.87 + ``` + +**CONCLUSION** + +* All 3 regression algorithms used in this project are equally efficient for the given dataset. +* RMSE for Ridge Regression is least \ No newline at end of file diff --git a/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb new file mode 100644 index 00000000..1ced84f3 --- /dev/null +++ b/Finacial Domain/Indian Startup Funding analysis/Startup Profit Prediction/Startup_Profit_Prediction.ipynb @@ -0,0 +1,3313 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Startup_Profit_Prediction.ipynb", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## **Data Exploration**" + ], + "metadata": { + "id": "eDvsUsu5Id3m" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "ewqbUJv1AQaS" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "source": [ + "data=pd.read_csv('/content/50_Startups.csv')\n", + "data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "OkMSu_Z7Aatz", + "outputId": "ca131c46-392a-4be0-ded0-467989be79de" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendStateProfit
0165349.20136897.80471784.10New York192261.83
1162597.70151377.59443898.53California191792.06
2153441.51101145.55407934.54Florida191050.39
3144372.41118671.85383199.62New York182901.99
4142107.3491391.77366168.42Florida166187.94
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend State Profit\n", + "0 165349.20 136897.80 471784.10 New York 192261.83\n", + "1 162597.70 151377.59 443898.53 California 191792.06\n", + "2 153441.51 101145.55 407934.54 Florida 191050.39\n", + "3 144372.41 118671.85 383199.62 New York 182901.99\n", + "4 142107.34 91391.77 366168.42 Florida 166187.94" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_A36EGFvAyfp", + "outputId": "92794687-844b-4f3a-b2be-9570561b6493" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(50, 5)" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.columns" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KR3zRsslBBVj", + "outputId": "34d84f79-ff79-41ca-b42e-23721ecebf16" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['R&D Spend', 'Administration', 'Marketing Spend', 'State', 'Profit'], dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.dtypes" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CW0ecCtGBILi", + "outputId": "d3d47527-d1cf-4648-9152-f296839e9f04" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "R&D Spend float64\n", + "Administration float64\n", + "Marketing Spend float64\n", + "State object\n", + "Profit float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "60EREBWYCu1s", + "outputId": "842eabe5-b6c6-44ed-d684-f4f254e8a69c" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 50 entries, 0 to 49\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 R&D Spend 50 non-null float64\n", + " 1 Administration 50 non-null float64\n", + " 2 Marketing Spend 50 non-null float64\n", + " 3 State 50 non-null object \n", + " 4 Profit 50 non-null float64\n", + "dtypes: float64(4), object(1)\n", + "memory usage: 2.1+ KB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "enn49lKCB7vw", + "outputId": "7687ffb7-2553-4f31-8c37-d68d692a0d89" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendProfit
count50.00000050.00000050.00000050.000000
mean73721.615600121344.639600211025.097800112012.639200
std45902.25648228017.802755122290.31072640306.180338
min0.00000051283.1400000.00000014681.400000
25%39936.370000103730.875000129300.13250090138.902500
50%73051.080000122699.795000212716.240000107978.190000
75%101602.800000144842.180000299469.085000139765.977500
max165349.200000182645.560000471784.100000192261.830000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend Profit\n", + "count 50.000000 50.000000 50.000000 50.000000\n", + "mean 73721.615600 121344.639600 211025.097800 112012.639200\n", + "std 45902.256482 28017.802755 122290.310726 40306.180338\n", + "min 0.000000 51283.140000 0.000000 14681.400000\n", + "25% 39936.370000 103730.875000 129300.132500 90138.902500\n", + "50% 73051.080000 122699.795000 212716.240000 107978.190000\n", + "75% 101602.800000 144842.180000 299469.085000 139765.977500\n", + "max 165349.200000 182645.560000 471784.100000 192261.830000" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## **Data Preparation**" + ], + "metadata": { + "id": "-L_2h_QtIkVd" + } + }, + { + "cell_type": "code", + "source": [ + "data.isnull().sum() #to check for any null/missing values." + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ec-HkrQpCAgY", + "outputId": "5fe2435f-7aff-4d86-ec0f-64c9b8f69e30" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "R&D Spend 0\n", + "Administration 0\n", + "Marketing Spend 0\n", + "State 0\n", + "Profit 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data.isnull().sum().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mWqKuJuhCFdl", + "outputId": "f1521180-11ac-4161-ac6e-99f4594ebf38" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data['State'].nunique()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8U_WW5jNCfxE", + "outputId": "5ef09369-8943-4321-dd73-2fa822dbeaec" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "data['State'].unique()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Om248DvSCmMd", + "outputId": "66081326-6197-4cae-af09-58d72680756a" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['New York', 'California', 'Florida'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# converting State column which is object datatype to int data type.\n", + "columns=['State']\n", + "data1=data[columns]\n", + "dummies=pd.get_dummies(data1,columns=['State'])\n", + "dummies" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "td3HIRArCowD", + "outputId": "30ff0355-55b2-4cbe-e918-b94131f23619" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
State_CaliforniaState_FloridaState_New York
0001
1100
2010
3001
4010
5001
6100
7010
8001
9100
10010
11100
12010
13100
14010
15001
16100
17001
18010
19001
20100
21001
22010
23010
24001
25100
26010
27001
28010
29001
30010
31001
32100
33010
34100
35001
36010
37100
38001
39100
40100
41010
42100
43001
44100
45001
46010
47100
48001
49100
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " State_California State_Florida State_New York\n", + "0 0 0 1\n", + "1 1 0 0\n", + "2 0 1 0\n", + "3 0 0 1\n", + "4 0 1 0\n", + "5 0 0 1\n", + "6 1 0 0\n", + "7 0 1 0\n", + "8 0 0 1\n", + "9 1 0 0\n", + "10 0 1 0\n", + "11 1 0 0\n", + "12 0 1 0\n", + "13 1 0 0\n", + "14 0 1 0\n", + "15 0 0 1\n", + "16 1 0 0\n", + "17 0 0 1\n", + "18 0 1 0\n", + "19 0 0 1\n", + "20 1 0 0\n", + "21 0 0 1\n", + "22 0 1 0\n", + "23 0 1 0\n", + "24 0 0 1\n", + "25 1 0 0\n", + "26 0 1 0\n", + "27 0 0 1\n", + "28 0 1 0\n", + "29 0 0 1\n", + "30 0 1 0\n", + "31 0 0 1\n", + "32 1 0 0\n", + "33 0 1 0\n", + "34 1 0 0\n", + "35 0 0 1\n", + "36 0 1 0\n", + "37 1 0 0\n", + "38 0 0 1\n", + "39 1 0 0\n", + "40 1 0 0\n", + "41 0 1 0\n", + "42 1 0 0\n", + "43 0 0 1\n", + "44 1 0 0\n", + "45 0 0 1\n", + "46 0 1 0\n", + "47 1 0 0\n", + "48 0 0 1\n", + "49 1 0 0" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "mergeddata= pd.concat([data,dummies],axis='columns')\n", + "mergeddata" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "-4AyiyUYFy9M", + "outputId": "e6edd2d4-b0f5-47b2-b9db-9db7ffbfba1d" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendStateProfitState_CaliforniaState_FloridaState_New York
0165349.20136897.80471784.10New York192261.83001
1162597.70151377.59443898.53California191792.06100
2153441.51101145.55407934.54Florida191050.39010
3144372.41118671.85383199.62New York182901.99001
4142107.3491391.77366168.42Florida166187.94010
5131876.9099814.71362861.36New York156991.12001
6134615.46147198.87127716.82California156122.51100
7130298.13145530.06323876.68Florida155752.60010
8120542.52148718.95311613.29New York152211.77001
9123334.88108679.17304981.62California149759.96100
10101913.08110594.11229160.95Florida146121.95010
11100671.9691790.61249744.55California144259.40100
1293863.75127320.38249839.44Florida141585.52010
1391992.39135495.07252664.93California134307.35100
14119943.24156547.42256512.92Florida132602.65010
15114523.61122616.84261776.23New York129917.04001
1678013.11121597.55264346.06California126992.93100
1794657.16145077.58282574.31New York125370.37001
1891749.16114175.79294919.57Florida124266.90010
1986419.70153514.110.00New York122776.86001
2076253.86113867.30298664.47California118474.03100
2178389.47153773.43299737.29New York111313.02001
2273994.56122782.75303319.26Florida110352.25010
2367532.53105751.03304768.73Florida108733.99010
2477044.0199281.34140574.81New York108552.04001
2564664.71139553.16137962.62California107404.34100
2675328.87144135.98134050.07Florida105733.54010
2772107.60127864.55353183.81New York105008.31001
2866051.52182645.56118148.20Florida103282.38010
2965605.48153032.06107138.38New York101004.64001
3061994.48115641.2891131.24Florida99937.59010
3161136.38152701.9288218.23New York97483.56001
3263408.86129219.6146085.25California97427.84100
3355493.95103057.49214634.81Florida96778.92010
3446426.07157693.92210797.67California96712.80100
3546014.0285047.44205517.64New York96479.51001
3628663.76127056.21201126.82Florida90708.19010
3744069.9551283.14197029.42California89949.14100
3820229.5965947.93185265.10New York81229.06001
3938558.5182982.09174999.30California81005.76100
4028754.33118546.05172795.67California78239.91100
4127892.9284710.77164470.71Florida77798.83010
4223640.9396189.63148001.11California71498.49100
4315505.73127382.3035534.17New York69758.98001
4422177.74154806.1428334.72California65200.33100
451000.23124153.041903.93New York64926.08001
461315.46115816.21297114.46Florida49490.75010
470.00135426.920.00California42559.73100
48542.0551743.150.00New York35673.41001
490.00116983.8045173.06California14681.40100
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend State Profit \\\n", + "0 165349.20 136897.80 471784.10 New York 192261.83 \n", + "1 162597.70 151377.59 443898.53 California 191792.06 \n", + "2 153441.51 101145.55 407934.54 Florida 191050.39 \n", + "3 144372.41 118671.85 383199.62 New York 182901.99 \n", + "4 142107.34 91391.77 366168.42 Florida 166187.94 \n", + "5 131876.90 99814.71 362861.36 New York 156991.12 \n", + "6 134615.46 147198.87 127716.82 California 156122.51 \n", + "7 130298.13 145530.06 323876.68 Florida 155752.60 \n", + "8 120542.52 148718.95 311613.29 New York 152211.77 \n", + "9 123334.88 108679.17 304981.62 California 149759.96 \n", + "10 101913.08 110594.11 229160.95 Florida 146121.95 \n", + "11 100671.96 91790.61 249744.55 California 144259.40 \n", + "12 93863.75 127320.38 249839.44 Florida 141585.52 \n", + "13 91992.39 135495.07 252664.93 California 134307.35 \n", + "14 119943.24 156547.42 256512.92 Florida 132602.65 \n", + "15 114523.61 122616.84 261776.23 New York 129917.04 \n", + "16 78013.11 121597.55 264346.06 California 126992.93 \n", + "17 94657.16 145077.58 282574.31 New York 125370.37 \n", + "18 91749.16 114175.79 294919.57 Florida 124266.90 \n", + "19 86419.70 153514.11 0.00 New York 122776.86 \n", + "20 76253.86 113867.30 298664.47 California 118474.03 \n", + "21 78389.47 153773.43 299737.29 New York 111313.02 \n", + "22 73994.56 122782.75 303319.26 Florida 110352.25 \n", + "23 67532.53 105751.03 304768.73 Florida 108733.99 \n", + "24 77044.01 99281.34 140574.81 New York 108552.04 \n", + "25 64664.71 139553.16 137962.62 California 107404.34 \n", + "26 75328.87 144135.98 134050.07 Florida 105733.54 \n", + "27 72107.60 127864.55 353183.81 New York 105008.31 \n", + "28 66051.52 182645.56 118148.20 Florida 103282.38 \n", + "29 65605.48 153032.06 107138.38 New York 101004.64 \n", + "30 61994.48 115641.28 91131.24 Florida 99937.59 \n", + "31 61136.38 152701.92 88218.23 New York 97483.56 \n", + "32 63408.86 129219.61 46085.25 California 97427.84 \n", + "33 55493.95 103057.49 214634.81 Florida 96778.92 \n", + "34 46426.07 157693.92 210797.67 California 96712.80 \n", + "35 46014.02 85047.44 205517.64 New York 96479.51 \n", + "36 28663.76 127056.21 201126.82 Florida 90708.19 \n", + "37 44069.95 51283.14 197029.42 California 89949.14 \n", + "38 20229.59 65947.93 185265.10 New York 81229.06 \n", + "39 38558.51 82982.09 174999.30 California 81005.76 \n", + "40 28754.33 118546.05 172795.67 California 78239.91 \n", + "41 27892.92 84710.77 164470.71 Florida 77798.83 \n", + "42 23640.93 96189.63 148001.11 California 71498.49 \n", + "43 15505.73 127382.30 35534.17 New York 69758.98 \n", + "44 22177.74 154806.14 28334.72 California 65200.33 \n", + "45 1000.23 124153.04 1903.93 New York 64926.08 \n", + "46 1315.46 115816.21 297114.46 Florida 49490.75 \n", + "47 0.00 135426.92 0.00 California 42559.73 \n", + "48 542.05 51743.15 0.00 New York 35673.41 \n", + "49 0.00 116983.80 45173.06 California 14681.40 \n", + "\n", + " State_California State_Florida State_New York \n", + "0 0 0 1 \n", + "1 1 0 0 \n", + "2 0 1 0 \n", + "3 0 0 1 \n", + "4 0 1 0 \n", + "5 0 0 1 \n", + "6 1 0 0 \n", + "7 0 1 0 \n", + "8 0 0 1 \n", + "9 1 0 0 \n", + "10 0 1 0 \n", + "11 1 0 0 \n", + "12 0 1 0 \n", + "13 1 0 0 \n", + "14 0 1 0 \n", + "15 0 0 1 \n", + "16 1 0 0 \n", + "17 0 0 1 \n", + "18 0 1 0 \n", + "19 0 0 1 \n", + "20 1 0 0 \n", + "21 0 0 1 \n", + "22 0 1 0 \n", + "23 0 1 0 \n", + "24 0 0 1 \n", + "25 1 0 0 \n", + "26 0 1 0 \n", + "27 0 0 1 \n", + "28 0 1 0 \n", + "29 0 0 1 \n", + "30 0 1 0 \n", + "31 0 0 1 \n", + "32 1 0 0 \n", + "33 0 1 0 \n", + "34 1 0 0 \n", + "35 0 0 1 \n", + "36 0 1 0 \n", + "37 1 0 0 \n", + "38 0 0 1 \n", + "39 1 0 0 \n", + "40 1 0 0 \n", + "41 0 1 0 \n", + "42 1 0 0 \n", + "43 0 0 1 \n", + "44 1 0 0 \n", + "45 0 0 1 \n", + "46 0 1 0 \n", + "47 1 0 0 \n", + "48 0 0 1 \n", + "49 1 0 0 " + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "newdata=mergeddata.drop(['State'],axis='columns')\n", + "newdata" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "DGDE3wKqF9nh", + "outputId": "e179dea2-61f2-42a6-f28a-695f21055a82" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendProfitState_CaliforniaState_FloridaState_New York
0165349.20136897.80471784.10192261.83001
1162597.70151377.59443898.53191792.06100
2153441.51101145.55407934.54191050.39010
3144372.41118671.85383199.62182901.99001
4142107.3491391.77366168.42166187.94010
5131876.9099814.71362861.36156991.12001
6134615.46147198.87127716.82156122.51100
7130298.13145530.06323876.68155752.60010
8120542.52148718.95311613.29152211.77001
9123334.88108679.17304981.62149759.96100
10101913.08110594.11229160.95146121.95010
11100671.9691790.61249744.55144259.40100
1293863.75127320.38249839.44141585.52010
1391992.39135495.07252664.93134307.35100
14119943.24156547.42256512.92132602.65010
15114523.61122616.84261776.23129917.04001
1678013.11121597.55264346.06126992.93100
1794657.16145077.58282574.31125370.37001
1891749.16114175.79294919.57124266.90010
1986419.70153514.110.00122776.86001
2076253.86113867.30298664.47118474.03100
2178389.47153773.43299737.29111313.02001
2273994.56122782.75303319.26110352.25010
2367532.53105751.03304768.73108733.99010
2477044.0199281.34140574.81108552.04001
2564664.71139553.16137962.62107404.34100
2675328.87144135.98134050.07105733.54010
2772107.60127864.55353183.81105008.31001
2866051.52182645.56118148.20103282.38010
2965605.48153032.06107138.38101004.64001
3061994.48115641.2891131.2499937.59010
3161136.38152701.9288218.2397483.56001
3263408.86129219.6146085.2597427.84100
3355493.95103057.49214634.8196778.92010
3446426.07157693.92210797.6796712.80100
3546014.0285047.44205517.6496479.51001
3628663.76127056.21201126.8290708.19010
3744069.9551283.14197029.4289949.14100
3820229.5965947.93185265.1081229.06001
3938558.5182982.09174999.3081005.76100
4028754.33118546.05172795.6778239.91100
4127892.9284710.77164470.7177798.83010
4223640.9396189.63148001.1171498.49100
4315505.73127382.3035534.1769758.98001
4422177.74154806.1428334.7265200.33100
451000.23124153.041903.9364926.08001
461315.46115816.21297114.4649490.75010
470.00135426.920.0042559.73100
48542.0551743.150.0035673.41001
490.00116983.8045173.0614681.40100
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend Profit State_California \\\n", + "0 165349.20 136897.80 471784.10 192261.83 0 \n", + "1 162597.70 151377.59 443898.53 191792.06 1 \n", + "2 153441.51 101145.55 407934.54 191050.39 0 \n", + "3 144372.41 118671.85 383199.62 182901.99 0 \n", + "4 142107.34 91391.77 366168.42 166187.94 0 \n", + "5 131876.90 99814.71 362861.36 156991.12 0 \n", + "6 134615.46 147198.87 127716.82 156122.51 1 \n", + "7 130298.13 145530.06 323876.68 155752.60 0 \n", + "8 120542.52 148718.95 311613.29 152211.77 0 \n", + "9 123334.88 108679.17 304981.62 149759.96 1 \n", + "10 101913.08 110594.11 229160.95 146121.95 0 \n", + "11 100671.96 91790.61 249744.55 144259.40 1 \n", + "12 93863.75 127320.38 249839.44 141585.52 0 \n", + "13 91992.39 135495.07 252664.93 134307.35 1 \n", + "14 119943.24 156547.42 256512.92 132602.65 0 \n", + "15 114523.61 122616.84 261776.23 129917.04 0 \n", + "16 78013.11 121597.55 264346.06 126992.93 1 \n", + "17 94657.16 145077.58 282574.31 125370.37 0 \n", + "18 91749.16 114175.79 294919.57 124266.90 0 \n", + "19 86419.70 153514.11 0.00 122776.86 0 \n", + "20 76253.86 113867.30 298664.47 118474.03 1 \n", + "21 78389.47 153773.43 299737.29 111313.02 0 \n", + "22 73994.56 122782.75 303319.26 110352.25 0 \n", + "23 67532.53 105751.03 304768.73 108733.99 0 \n", + "24 77044.01 99281.34 140574.81 108552.04 0 \n", + "25 64664.71 139553.16 137962.62 107404.34 1 \n", + "26 75328.87 144135.98 134050.07 105733.54 0 \n", + "27 72107.60 127864.55 353183.81 105008.31 0 \n", + "28 66051.52 182645.56 118148.20 103282.38 0 \n", + "29 65605.48 153032.06 107138.38 101004.64 0 \n", + "30 61994.48 115641.28 91131.24 99937.59 0 \n", + "31 61136.38 152701.92 88218.23 97483.56 0 \n", + "32 63408.86 129219.61 46085.25 97427.84 1 \n", + "33 55493.95 103057.49 214634.81 96778.92 0 \n", + "34 46426.07 157693.92 210797.67 96712.80 1 \n", + "35 46014.02 85047.44 205517.64 96479.51 0 \n", + "36 28663.76 127056.21 201126.82 90708.19 0 \n", + "37 44069.95 51283.14 197029.42 89949.14 1 \n", + "38 20229.59 65947.93 185265.10 81229.06 0 \n", + "39 38558.51 82982.09 174999.30 81005.76 1 \n", + "40 28754.33 118546.05 172795.67 78239.91 1 \n", + "41 27892.92 84710.77 164470.71 77798.83 0 \n", + "42 23640.93 96189.63 148001.11 71498.49 1 \n", + "43 15505.73 127382.30 35534.17 69758.98 0 \n", + "44 22177.74 154806.14 28334.72 65200.33 1 \n", + "45 1000.23 124153.04 1903.93 64926.08 0 \n", + "46 1315.46 115816.21 297114.46 49490.75 0 \n", + "47 0.00 135426.92 0.00 42559.73 1 \n", + "48 542.05 51743.15 0.00 35673.41 0 \n", + "49 0.00 116983.80 45173.06 14681.40 1 \n", + "\n", + " State_Florida State_New York \n", + "0 0 1 \n", + "1 0 0 \n", + "2 1 0 \n", + "3 0 1 \n", + "4 1 0 \n", + "5 0 1 \n", + "6 0 0 \n", + "7 1 0 \n", + "8 0 1 \n", + "9 0 0 \n", + "10 1 0 \n", + "11 0 0 \n", + "12 1 0 \n", + "13 0 0 \n", + "14 1 0 \n", + "15 0 1 \n", + "16 0 0 \n", + "17 0 1 \n", + "18 1 0 \n", + "19 0 1 \n", + "20 0 0 \n", + "21 0 1 \n", + "22 1 0 \n", + "23 1 0 \n", + "24 0 1 \n", + "25 0 0 \n", + "26 1 0 \n", + "27 0 1 \n", + "28 1 0 \n", + "29 0 1 \n", + "30 1 0 \n", + "31 0 1 \n", + "32 0 0 \n", + "33 1 0 \n", + "34 0 0 \n", + "35 0 1 \n", + "36 1 0 \n", + "37 0 0 \n", + "38 0 1 \n", + "39 0 0 \n", + "40 0 0 \n", + "41 1 0 \n", + "42 0 0 \n", + "43 0 1 \n", + "44 0 0 \n", + "45 0 1 \n", + "46 1 0 \n", + "47 0 0 \n", + "48 0 1 \n", + "49 0 0 " + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "#converting data into int datatype to avoid errors below.\n", + "prepareddata=newdata.astype(int)\n", + "prepareddata.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "pMCvSC7OCIVo", + "outputId": "d5a7d6a9-c7b5-456a-878b-8ab71ef0c9f5" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendProfitState_CaliforniaState_FloridaState_New York
0165349136897471784192261001
1162597151377443898191792100
2153441101145407934191050010
3144372118671383199182901001
414210791391366168166187010
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend Profit State_California \\\n", + "0 165349 136897 471784 192261 0 \n", + "1 162597 151377 443898 191792 1 \n", + "2 153441 101145 407934 191050 0 \n", + "3 144372 118671 383199 182901 0 \n", + "4 142107 91391 366168 166187 0 \n", + "\n", + " State_Florida State_New York \n", + "0 0 1 \n", + "1 0 0 \n", + "2 1 0 \n", + "3 0 1 \n", + "4 1 0 " + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "prepareddata.columns" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OwVA8SgXC977", + "outputId": "54fdd233-b40a-4b6b-c889-1a3eaf83608f" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['R&D Spend', 'Administration', 'Marketing Spend', 'Profit',\n", + " 'State_California', 'State_Florida', 'State_New York'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "prepareddata.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2sDm8iXbGUY7", + "outputId": "e3d7cc94-84bc-49cc-b7a2-ef73bdfcb608" + }, + "execution_count": 28, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 50 entries, 0 to 49\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 R&D Spend 50 non-null int64\n", + " 1 Administration 50 non-null int64\n", + " 2 Marketing Spend 50 non-null int64\n", + " 3 Profit 50 non-null int64\n", + " 4 State_California 50 non-null int64\n", + " 5 State_Florida 50 non-null int64\n", + " 6 State_New York 50 non-null int64\n", + "dtypes: int64(7)\n", + "memory usage: 2.9 KB\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## **Training Our Data**\n", + "\n" + ], + "metadata": { + "id": "P41kwXp4JBH5" + } + }, + { + "cell_type": "code", + "source": [ + "# Import train_test_split from sklearn.model_selection\n", + "from sklearn.model_selection import train_test_split\n", + "# Here, X is the data which will have features and y will have our target.\n", + "x=prepareddata[['R&D Spend', 'Administration', 'Marketing Spend','State_California', 'State_Florida', 'State_New York']] \n", + "y=prepareddata['Profit']" + ], + "metadata": { + "id": "MJa5GnxPCZaE" + }, + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Split data into training data and testing data\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) \n", + "#Ratio used for splitting training and testing data is 8:2 respectively" + ], + "metadata": { + "id": "fDQcGTJXDAOX" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## **Model Creation**" + ], + "metadata": { + "id": "l6vk_SdZGwPv" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Linear Regression" + ], + "metadata": { + "id": "vD9AVlk4GsMm" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing linear regression model\n", + "from sklearn.linear_model import LinearRegression \n", + "reg1 = LinearRegression()" + ], + "metadata": { + "id": "bc5Dk-TbDEAm" + }, + "execution_count": 31, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Fitting data into the model.\n", + "reg1.fit(x_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-ntbFAnND5Yc", + "outputId": "aab48579-aad5-4dd2-ee8f-cd2ff0978c98" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Making predictions \n", + "pred1 = reg1.predict(x_test)" + ], + "metadata": { + "id": "w11EfqAYD8Ae" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "pred1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mrGObL-WD-9x", + "outputId": "02a3e327-4c5f-464d-9595-4eec1ad37cb1" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 98484.89210481, 44026.08455385, 157308.22696881, 164609.14315337,\n", + " 151943.20929796, 56973.06829596, 84210.34971486, 116881.35386378,\n", + " 184161.16574319, 129863.57642281])" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Lasso Regression" + ], + "metadata": { + "id": "7sbXj2IRG7Sy" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing model\n", + "from sklearn.linear_model import Lasso\n", + "reg2 = Lasso()" + ], + "metadata": { + "id": "QccbhNhLEEtq" + }, + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Fitting data into the model.\n", + "reg2.fit(x_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qYNgs9leEFkb", + "outputId": "0564c43b-2bf9-4efb-8850-e1fe79d42191" + }, + "execution_count": 36, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Lasso()" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Making predictions \n", + "pred2 = reg2.predict(x_test)" + ], + "metadata": { + "id": "ul8NvlADEHxP" + }, + "execution_count": 37, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "pred2" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3BxHt-RPEKGM", + "outputId": "df944e4e-8f51-4b9b-97ca-c1c20c9caa9a" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 98481.63268423, 44030.60188057, 157304.38045446, 164612.05593362,\n", + " 151947.12473384, 56977.5034078 , 84207.08582321, 116886.10440022,\n", + " 184159.91077455, 129867.79195108])" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Ridge Regression" + ], + "metadata": { + "id": "6TjhWEG0HCQ7" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing model\n", + "from sklearn.linear_model import Ridge\n", + "reg3 = Ridge()" + ], + "metadata": { + "id": "eiuI077XEMF2" + }, + "execution_count": 39, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Fitting data into the model.\n", + "reg3.fit(x_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Do_IVkEKEO9i", + "outputId": "df33f62c-5e0f-4dca-be69-876662879aa2" + }, + "execution_count": 40, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Ridge()" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Making predictions \n", + "pred3= reg3.predict(x_test)\n" + ], + "metadata": { + "id": "DL4ooUm3ETQn" + }, + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "pred3" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hQ_UYvYvEYfs", + "outputId": "2451ab9f-0d32-4a56-c2c9-06efc590f7db" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 98402.18973791, 44116.59986663, 157204.03565538, 164655.26881854,\n", + " 152009.91085477, 57060.68872438, 84122.19767986, 116963.88574768,\n", + " 184158.12254095, 129936.98380266])" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## **Performance Check**" + ], + "metadata": { + "id": "AqY21cEQHNqg" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "from sklearn.metrics import mean_squared_error\n", + "print(\"Model\\t\\t\\t RootMeanSquareError \\t\\t Accuracy of the model\") \n", + "print(\"\"\"Linear Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred1)), reg1.score(x_train,y_train)))\n", + "print(\"\"\"Lasso Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred2)), reg2.score(x_train,y_train)))\n", + "print(\"\"\"Ridge Regression \\t\\t {:.4f} \\t \\t\\t {:.4f}\"\"\".format( np.sqrt(mean_squared_error(y_test, pred3)), reg3.score(x_train,y_train)))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k1b5N9rlEZVB", + "outputId": "d5ccb24d-e358-4437-cf7e-ab7d690e9466" + }, + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model\t\t\t RootMeanSquareError \t\t Accuracy of the model\n", + "Linear Regression \t\t 9085.1958 \t \t\t 0.9487\n", + "Lasso Regression \t\t 9083.8880 \t \t\t 0.9487\n", + "Ridge Regression \t\t 9052.1743 \t \t\t 0.9487\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### **Conclusion**\n", + "* All 3 regression algorithms used in this project are equally efficient for the given dataset.\n", + "* RMSE for Ridge Regression is least." + ], + "metadata": { + "id": "I4INbffZHWFz" + } + } + ] +} \ No newline at end of file