diff --git a/TechChallange.ipynb b/TechChallange.ipynb
new file mode 100644
index 0000000..9975750
--- /dev/null
+++ b/TechChallange.ipynb
@@ -0,0 +1,735 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "9712600f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "94aefe46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = ('data/measurements.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "e4ddf56b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "69a6679e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " distance | \n",
+ " consume | \n",
+ " speed | \n",
+ " temp_inside | \n",
+ " temp_outside | \n",
+ " specials | \n",
+ " gas_type | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ " refill liters | \n",
+ " refill gas | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 28 | \n",
+ " 5 | \n",
+ " 26 | \n",
+ " 21,5 | \n",
+ " 12 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 45 | \n",
+ " E10 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12 | \n",
+ " 4,2 | \n",
+ " 30 | \n",
+ " 21,5 | \n",
+ " 13 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11,2 | \n",
+ " 5,5 | \n",
+ " 38 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 12,9 | \n",
+ " 3,9 | \n",
+ " 36 | \n",
+ " 21,5 | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 18,5 | \n",
+ " 4,5 | \n",
+ " 46 | \n",
+ " 21,5 | \n",
+ " 15 | \n",
+ " NaN | \n",
+ " E10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " distance consume speed temp_inside temp_outside specials gas_type AC \\\n",
+ "0 28 5 26 21,5 12 NaN E10 0 \n",
+ "1 12 4,2 30 21,5 13 NaN E10 0 \n",
+ "2 11,2 5,5 38 21,5 15 NaN E10 0 \n",
+ "3 12,9 3,9 36 21,5 14 NaN E10 0 \n",
+ "4 18,5 4,5 46 21,5 15 NaN E10 0 \n",
+ "\n",
+ " rain sun refill liters refill gas \n",
+ "0 0 0 45 E10 \n",
+ "1 0 0 NaN NaN \n",
+ "2 0 0 NaN NaN \n",
+ "3 0 0 NaN NaN \n",
+ "4 0 0 NaN NaN "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "548f1460",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " speed | \n",
+ " temp_outside | \n",
+ " AC | \n",
+ " rain | \n",
+ " sun | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 388.000000 | \n",
+ " 388.000000 | \n",
+ " 388.000000 | \n",
+ " 388.000000 | \n",
+ " 388.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 41.927835 | \n",
+ " 11.358247 | \n",
+ " 0.077320 | \n",
+ " 0.123711 | \n",
+ " 0.082474 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 13.598524 | \n",
+ " 6.991542 | \n",
+ " 0.267443 | \n",
+ " 0.329677 | \n",
+ " 0.275441 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 14.000000 | \n",
+ " -5.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 32.750000 | \n",
+ " 7.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 40.500000 | \n",
+ " 10.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 50.000000 | \n",
+ " 16.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 90.000000 | \n",
+ " 31.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " speed temp_outside AC rain sun\n",
+ "count 388.000000 388.000000 388.000000 388.000000 388.000000\n",
+ "mean 41.927835 11.358247 0.077320 0.123711 0.082474\n",
+ "std 13.598524 6.991542 0.267443 0.329677 0.275441\n",
+ "min 14.000000 -5.000000 0.000000 0.000000 0.000000\n",
+ "25% 32.750000 7.000000 0.000000 0.000000 0.000000\n",
+ "50% 40.500000 10.000000 0.000000 0.000000 0.000000\n",
+ "75% 50.000000 16.000000 0.000000 0.000000 0.000000\n",
+ "max 90.000000 31.000000 1.000000 1.000000 1.000000"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "1b6d4f28",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(388, 12)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "ef497cc5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['speed', 'temp_outside', 'AC', 'rain', 'sun']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "numeric_cols = df.select_dtypes(include=\"number\").columns.tolist()\n",
+ "numeric_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "35ce9772",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['distance',\n",
+ " 'consume',\n",
+ " 'temp_inside',\n",
+ " 'specials',\n",
+ " 'gas_type',\n",
+ " 'refill liters',\n",
+ " 'refill gas']"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "categorical_cols = df.select_dtypes(exclude=\"number\").columns.tolist()\n",
+ "categorical_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "dfdef80a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "distance 0\n",
+ "consume 0\n",
+ "speed 0\n",
+ "temp_inside 12\n",
+ "temp_outside 0\n",
+ "specials 295\n",
+ "gas_type 0\n",
+ "AC 0\n",
+ "rain 0\n",
+ "sun 0\n",
+ "refill liters 375\n",
+ "refill gas 375\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "dacd29e1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# drop specials column\n",
+ "df.drop(\"specials\", axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1c139961",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 21\n",
+ "1 21\n",
+ "2 21\n",
+ "3 21\n",
+ "4 21\n",
+ " ..\n",
+ "383 24\n",
+ "384 25\n",
+ "385 25\n",
+ "386 25\n",
+ "387 25\n",
+ "Name: temp_inside, Length: 388, dtype: object"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# format Total_amount, column to be two decimal places\n",
+ "df[\"temp_inside\"] = df[\"temp_inside\"].map('{:.2}'.format)\n",
+ "df[\"temp_inside\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "60058be5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# fill the na values in the temp_insode column with the mean\n",
+ "df[\"temp_inside\"] = pd.to_numeric(df[\"temp_inside\"],errors=\"coerce\").fillna(df[\"temp_inside\"].astype(float).mean()).astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "54db1037",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 21\n",
+ "1 21\n",
+ "2 21\n",
+ "3 21\n",
+ "4 21\n",
+ " ..\n",
+ "383 24\n",
+ "384 25\n",
+ "385 25\n",
+ "386 25\n",
+ "387 25\n",
+ "Name: temp_inside, Length: 388, dtype: int32"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"temp_inside\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "0a0b36fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# replace the \",\" with \".\" in distance and consume\n",
+ "df[\"distance\"]=df[\"distance\"].str.replace(',', '.')\n",
+ "df[\"consume\"]=df[\"consume\"].str.replace(',', '.')\n",
+ "df[\"refill liters\"]=df[\"refill liters\"].str.replace(',', '.')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "610110c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# convert to float\n",
+ "df[\"distance\"]=df[\"distance\"].astype(float)\n",
+ "df[\"consume\"]=df[\"consume\"].astype(float)\n",
+ "df[\"refill liters\"]=df[\"refill liters\"].astype(float)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "35429a16",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "