From 68d5c09bd7b8e73230f69dfed142ab1aa15c13c4 Mon Sep 17 00:00:00 2001 From: B L A C K F U R Y <93466451+div-hacks@users.noreply.github.com> Date: Sat, 14 Oct 2023 12:44:51 +0530 Subject: [PATCH] pandas dataframe exercises --- pandas/dataframe2.ipynb | 30 ++ pandas/dataframes1.ipynb | 976 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 1006 insertions(+) create mode 100644 pandas/dataframe2.ipynb create mode 100644 pandas/dataframes1.ipynb diff --git a/pandas/dataframe2.ipynb b/pandas/dataframe2.ipynb new file mode 100644 index 0000000..6b6ba06 --- /dev/null +++ b/pandas/dataframe2.ipynb @@ -0,0 +1,30 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_algo_bulls", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "addff6cb5044c101bf79387aa2799290f36a7993ca657c8e96968a170b3cd553" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pandas/dataframes1.ipynb b/pandas/dataframes1.ipynb new file mode 100644 index 0000000..d17c6c3 --- /dev/null +++ b/pandas/dataframes1.ipynb @@ -0,0 +1,976 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas - DataFrames" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from numpy.random import randn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(101)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data=randn(5,4), index=['a','b','c','d','e'], columns=['w', 'x', 'y', 'z'])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wxyz
a2.7068500.6281330.9079690.503826
b0.651118-0.319318-0.8480770.605965
c-2.0181680.7401220.528813-0.589001
d0.188695-0.758872-0.9332370.955057
e0.1907941.9787572.6059670.683509
\n", + "
" + ], + "text/plain": [ + " w x y z\n", + "a 2.706850 0.628133 0.907969 0.503826\n", + "b 0.651118 -0.319318 -0.848077 0.605965\n", + "c -2.018168 0.740122 0.528813 -0.589001\n", + "d 0.188695 -0.758872 -0.933237 0.955057\n", + "e 0.190794 1.978757 2.605967 0.683509" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 2.706850\n", + "b 0.651118\n", + "c -2.018168\n", + "d 0.188695\n", + "e 0.190794\n", + "Name: w, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['w']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 2.706850\n", + "b 0.651118\n", + "c -2.018168\n", + "d 0.188695\n", + "e 0.190794\n", + "Name: w, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.w # don't use this syntax\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 0.628133\n", + "b -0.319318\n", + "c 0.740122\n", + "d -0.758872\n", + "e 1.978757\n", + "Name: x, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['x']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wz
a2.7068500.503826
b0.6511180.605965
c-2.018168-0.589001
d0.1886950.955057
e0.1907940.683509
\n", + "
" + ], + "text/plain": [ + " w z\n", + "a 2.706850 0.503826\n", + "b 0.651118 0.605965\n", + "c -2.018168 -0.589001\n", + "d 0.188695 0.955057\n", + "e 0.190794 0.683509" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['w','z']]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df['new'] = df['w'] + df['y']" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wxyznew
a2.7068500.6281330.9079690.5038263.614819
b0.651118-0.319318-0.8480770.605965-0.196959
c-2.0181680.7401220.528813-0.589001-1.489355
d0.188695-0.758872-0.9332370.955057-0.744542
e0.1907941.9787572.6059670.6835092.796762
\n", + "
" + ], + "text/plain": [ + " w x y z new\n", + "a 2.706850 0.628133 0.907969 0.503826 3.614819\n", + "b 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n", + "c -2.018168 0.740122 0.528813 -0.589001 -1.489355\n", + "d 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n", + "e 0.190794 1.978757 2.605967 0.683509 2.796762" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wxyz
a2.7068500.6281330.9079690.503826
b0.651118-0.319318-0.8480770.605965
c-2.0181680.7401220.528813-0.589001
d0.188695-0.758872-0.9332370.955057
e0.1907941.9787572.6059670.683509
\n", + "
" + ], + "text/plain": [ + " w x y z\n", + "a 2.706850 0.628133 0.907969 0.503826\n", + "b 0.651118 -0.319318 -0.848077 0.605965\n", + "c -2.018168 0.740122 0.528813 -0.589001\n", + "d 0.188695 -0.758872 -0.933237 0.955057\n", + "e 0.190794 1.978757 2.605967 0.683509" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop('new', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wxyznew
a2.7068500.6281330.9079690.5038263.614819
b0.651118-0.319318-0.8480770.605965-0.196959
c-2.0181680.7401220.528813-0.589001-1.489355
d0.188695-0.758872-0.9332370.955057-0.744542
e0.1907941.9787572.6059670.6835092.796762
\n", + "
" + ], + "text/plain": [ + " w x y z new\n", + "a 2.706850 0.628133 0.907969 0.503826 3.614819\n", + "b 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n", + "c -2.018168 0.740122 0.528813 -0.589001 -1.489355\n", + "d 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n", + "e 0.190794 1.978757 2.605967 0.683509 2.796762" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "df.drop('new', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wxyz
a2.7068500.6281330.9079690.503826
b0.651118-0.319318-0.8480770.605965
c-2.0181680.7401220.528813-0.589001
d0.188695-0.758872-0.9332370.955057
e0.1907941.9787572.6059670.683509
\n", + "
" + ], + "text/plain": [ + " w x y z\n", + "a 2.706850 0.628133 0.907969 0.503826\n", + "b 0.651118 -0.319318 -0.848077 0.605965\n", + "c -2.018168 0.740122 0.528813 -0.589001\n", + "d 0.188695 -0.758872 -0.933237 0.955057\n", + "e 0.190794 1.978757 2.605967 0.683509" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 4)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
zx
a0.5038260.628133
b0.605965-0.319318
c-0.5890010.740122
d0.955057-0.758872
e0.6835091.978757
\n", + "
" + ], + "text/plain": [ + " z x\n", + "a 0.503826 0.628133\n", + "b 0.605965 -0.319318\n", + "c -0.589001 0.740122\n", + "d 0.955057 -0.758872\n", + "e 0.683509 1.978757" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['z','x']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# two ways to select rows in pandas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "w -2.018168\n", + "x 0.740122\n", + "y 0.528813\n", + "z -0.589001\n", + "Name: c, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['c']" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "w -2.018168\n", + "x 0.740122\n", + "y 0.528813\n", + "z -0.589001\n", + "Name: c, dtype: float64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.8480769834036315" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['b','y']" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wy
a2.7068500.907969
b0.651118-0.848077
\n", + "
" + ], + "text/plain": [ + " w y\n", + "a 2.706850 0.907969\n", + "b 0.651118 -0.848077" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[['a','b'],['w','y']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "83075fc33da6c4b78008558628b10ee9a4bc260aeeca96452177134413489e34" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}