From 68d5c09bd7b8e73230f69dfed142ab1aa15c13c4 Mon Sep 17 00:00:00 2001
From: B L A C K F U R Y <93466451+div-hacks@users.noreply.github.com>
Date: Sat, 14 Oct 2023 12:44:51 +0530
Subject: [PATCH] pandas dataframe exercises
---
pandas/dataframe2.ipynb | 30 ++
pandas/dataframes1.ipynb | 976 +++++++++++++++++++++++++++++++++++++++
2 files changed, 1006 insertions(+)
create mode 100644 pandas/dataframe2.ipynb
create mode 100644 pandas/dataframes1.ipynb
diff --git a/pandas/dataframe2.ipynb b/pandas/dataframe2.ipynb
new file mode 100644
index 0000000..6b6ba06
--- /dev/null
+++ b/pandas/dataframe2.ipynb
@@ -0,0 +1,30 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv_algo_bulls",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python",
+ "version": "3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)]"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "addff6cb5044c101bf79387aa2799290f36a7993ca657c8e96968a170b3cd553"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pandas/dataframes1.ipynb b/pandas/dataframes1.ipynb
new file mode 100644
index 0000000..d17c6c3
--- /dev/null
+++ b/pandas/dataframes1.ipynb
@@ -0,0 +1,976 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Pandas - DataFrames"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from numpy.random import randn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.random.seed(101)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(data=randn(5,4), index=['a','b','c','d','e'], columns=['w', 'x', 'y', 'z'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.628133 | \n",
+ " 0.907969 | \n",
+ " 0.503826 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.319318 | \n",
+ " -0.848077 | \n",
+ " 0.605965 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " 0.740122 | \n",
+ " 0.528813 | \n",
+ " -0.589001 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " -0.758872 | \n",
+ " -0.933237 | \n",
+ " 0.955057 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 1.978757 | \n",
+ " 2.605967 | \n",
+ " 0.683509 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w x y z\n",
+ "a 2.706850 0.628133 0.907969 0.503826\n",
+ "b 0.651118 -0.319318 -0.848077 0.605965\n",
+ "c -2.018168 0.740122 0.528813 -0.589001\n",
+ "d 0.188695 -0.758872 -0.933237 0.955057\n",
+ "e 0.190794 1.978757 2.605967 0.683509"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "a 2.706850\n",
+ "b 0.651118\n",
+ "c -2.018168\n",
+ "d 0.188695\n",
+ "e 0.190794\n",
+ "Name: w, dtype: float64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['w']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "a 2.706850\n",
+ "b 0.651118\n",
+ "c -2.018168\n",
+ "d 0.188695\n",
+ "e 0.190794\n",
+ "Name: w, dtype: float64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.w # don't use this syntax\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "a 0.628133\n",
+ "b -0.319318\n",
+ "c 0.740122\n",
+ "d -0.758872\n",
+ "e 1.978757\n",
+ "Name: x, dtype: float64"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['x']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.503826 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " 0.605965 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " -0.589001 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " 0.955057 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 0.683509 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w z\n",
+ "a 2.706850 0.503826\n",
+ "b 0.651118 0.605965\n",
+ "c -2.018168 -0.589001\n",
+ "d 0.188695 0.955057\n",
+ "e 0.190794 0.683509"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[['w','z']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['new'] = df['w'] + df['y']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ " new | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.628133 | \n",
+ " 0.907969 | \n",
+ " 0.503826 | \n",
+ " 3.614819 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.319318 | \n",
+ " -0.848077 | \n",
+ " 0.605965 | \n",
+ " -0.196959 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " 0.740122 | \n",
+ " 0.528813 | \n",
+ " -0.589001 | \n",
+ " -1.489355 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " -0.758872 | \n",
+ " -0.933237 | \n",
+ " 0.955057 | \n",
+ " -0.744542 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 1.978757 | \n",
+ " 2.605967 | \n",
+ " 0.683509 | \n",
+ " 2.796762 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w x y z new\n",
+ "a 2.706850 0.628133 0.907969 0.503826 3.614819\n",
+ "b 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
+ "c -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
+ "d 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
+ "e 0.190794 1.978757 2.605967 0.683509 2.796762"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.628133 | \n",
+ " 0.907969 | \n",
+ " 0.503826 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.319318 | \n",
+ " -0.848077 | \n",
+ " 0.605965 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " 0.740122 | \n",
+ " 0.528813 | \n",
+ " -0.589001 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " -0.758872 | \n",
+ " -0.933237 | \n",
+ " 0.955057 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 1.978757 | \n",
+ " 2.605967 | \n",
+ " 0.683509 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w x y z\n",
+ "a 2.706850 0.628133 0.907969 0.503826\n",
+ "b 0.651118 -0.319318 -0.848077 0.605965\n",
+ "c -2.018168 0.740122 0.528813 -0.589001\n",
+ "d 0.188695 -0.758872 -0.933237 0.955057\n",
+ "e 0.190794 1.978757 2.605967 0.683509"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.drop('new', axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ " new | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.628133 | \n",
+ " 0.907969 | \n",
+ " 0.503826 | \n",
+ " 3.614819 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.319318 | \n",
+ " -0.848077 | \n",
+ " 0.605965 | \n",
+ " -0.196959 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " 0.740122 | \n",
+ " 0.528813 | \n",
+ " -0.589001 | \n",
+ " -1.489355 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " -0.758872 | \n",
+ " -0.933237 | \n",
+ " 0.955057 | \n",
+ " -0.744542 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 1.978757 | \n",
+ " 2.605967 | \n",
+ " 0.683509 | \n",
+ " 2.796762 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w x y z new\n",
+ "a 2.706850 0.628133 0.907969 0.503826 3.614819\n",
+ "b 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
+ "c -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
+ "d 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
+ "e 0.190794 1.978757 2.605967 0.683509 2.796762"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.drop('new', axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.628133 | \n",
+ " 0.907969 | \n",
+ " 0.503826 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.319318 | \n",
+ " -0.848077 | \n",
+ " 0.605965 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -2.018168 | \n",
+ " 0.740122 | \n",
+ " 0.528813 | \n",
+ " -0.589001 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.188695 | \n",
+ " -0.758872 | \n",
+ " -0.933237 | \n",
+ " 0.955057 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.190794 | \n",
+ " 1.978757 | \n",
+ " 2.605967 | \n",
+ " 0.683509 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w x y z\n",
+ "a 2.706850 0.628133 0.907969 0.503826\n",
+ "b 0.651118 -0.319318 -0.848077 0.605965\n",
+ "c -2.018168 0.740122 0.528813 -0.589001\n",
+ "d 0.188695 -0.758872 -0.933237 0.955057\n",
+ "e 0.190794 1.978757 2.605967 0.683509"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5, 4)"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " z | \n",
+ " x | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 0.503826 | \n",
+ " 0.628133 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.605965 | \n",
+ " -0.319318 | \n",
+ "
\n",
+ " \n",
+ " | c | \n",
+ " -0.589001 | \n",
+ " 0.740122 | \n",
+ "
\n",
+ " \n",
+ " | d | \n",
+ " 0.955057 | \n",
+ " -0.758872 | \n",
+ "
\n",
+ " \n",
+ " | e | \n",
+ " 0.683509 | \n",
+ " 1.978757 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " z x\n",
+ "a 0.503826 0.628133\n",
+ "b 0.605965 -0.319318\n",
+ "c -0.589001 0.740122\n",
+ "d 0.955057 -0.758872\n",
+ "e 0.683509 1.978757"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[['z','x']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# two ways to select rows in pandas"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "w -2.018168\n",
+ "x 0.740122\n",
+ "y 0.528813\n",
+ "z -0.589001\n",
+ "Name: c, dtype: float64"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.loc['c']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "w -2.018168\n",
+ "x 0.740122\n",
+ "y 0.528813\n",
+ "z -0.589001\n",
+ "Name: c, dtype: float64"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.iloc[2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-0.8480769834036315"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.loc['b','y']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " w | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | a | \n",
+ " 2.706850 | \n",
+ " 0.907969 | \n",
+ "
\n",
+ " \n",
+ " | b | \n",
+ " 0.651118 | \n",
+ " -0.848077 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " w y\n",
+ "a 2.706850 0.907969\n",
+ "b 0.651118 -0.848077"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.loc[['a','b'],['w','y']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)]"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "83075fc33da6c4b78008558628b10ee9a4bc260aeeca96452177134413489e34"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}