diff --git a/.ipynb_checkpoints/Connect Python_MySQL-D-checkpoint.ipynb b/.ipynb_checkpoints/Connect Python_MySQL-D-checkpoint.ipynb index eab587d..90cf311 100644 --- a/.ipynb_checkpoints/Connect Python_MySQL-D-checkpoint.ipynb +++ b/.ipynb_checkpoints/Connect Python_MySQL-D-checkpoint.ipynb @@ -83,7 +83,7 @@ }, { "cell_type": "markdown", - "id": "a724f5bb", + "id": "fd2da802", "metadata": {}, "source": [ "Inspect the Data Base" @@ -100141,7 +100141,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "c2c50951", + "id": "8b8e1dff", "metadata": {}, "outputs": [ { @@ -100279,7 +100279,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "9e3c9f17", + "id": "f065e8a7", "metadata": {}, "outputs": [ { @@ -100416,7 +100416,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "2f7a5664", + "id": "09884db8", "metadata": {}, "outputs": [ { @@ -100516,7 +100516,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "6bc399a4", + "id": "0ca2cd8d", "metadata": {}, "outputs": [ { @@ -100622,7 +100622,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "76e9c080", + "id": "457af3fd", "metadata": {}, "outputs": [ { @@ -100746,12 +100746,12 @@ }, { "cell_type": "code", - "execution_count": 67, - "id": "9bef36fe", + "execution_count": 69, + "id": "31611c8d", "metadata": {}, "outputs": [], "source": [ - "# query8 = \"SELECT * olist.order_items INNER JOIN olist.order_status_year_price ON order_id\"\n", + "# query8 = \"SELECT * olist.order_items JOIN olist.order_status_year_price ON order_item_id\"\n", "# res_df = pd.read_sql_query(query8, db_connection)\n", "# res_df.head()" ] @@ -100759,7 +100759,7 @@ { "cell_type": "code", "execution_count": 51, - "id": "0dac0a0b", + "id": "ca02e3f6", "metadata": {}, "outputs": [ { @@ -100865,7 +100865,7 @@ { "cell_type": "code", "execution_count": 54, - "id": "ad215e0a", + "id": "59fd5502", "metadata": {}, "outputs": [ { @@ -100995,7 +100995,7 @@ }, { "cell_type": "markdown", - "id": "818548a2", + "id": "8fc7b14a", "metadata": {}, "source": [ "### Data exploration" @@ -101004,7 +101004,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "85806400", + "id": "2956ef12", "metadata": {}, "outputs": [ { @@ -101027,7 +101027,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "a99a41f3", + "id": "cfac4819", "metadata": {}, "outputs": [ { @@ -101048,7 +101048,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "6815d389", + "id": "dc2268f5", "metadata": {}, "outputs": [ { @@ -101069,7 +101069,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "636434a7", + "id": "2fe8f647", "metadata": {}, "outputs": [ { @@ -101090,7 +101090,7 @@ { "cell_type": "code", "execution_count": 55, - "id": "432b4175", + "id": "d30a6203", "metadata": {}, "outputs": [ { @@ -101111,7 +101111,7 @@ { "cell_type": "code", "execution_count": 56, - "id": "9944eaeb", + "id": "471e0cfc", "metadata": {}, "outputs": [ { @@ -101132,7 +101132,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "a692627a", + "id": "cda20df7", "metadata": {}, "outputs": [ { @@ -101153,7 +101153,7 @@ { "cell_type": "code", "execution_count": 38, - "id": "38383622", + "id": "b63018f5", "metadata": {}, "outputs": [ { @@ -101174,17 +101174,133 @@ { "cell_type": "code", "execution_count": null, - "id": "211b8568", + "id": "6f71fc7f", "metadata": {}, "outputs": [], "source": [ "# columns of interest: product_id\tproduct_category_name\tproduct_weight_g\tproduct_length_cm\tproduct_height_cm\tproduct_width_cm" ] }, + { + "cell_type": "code", + "execution_count": 77, + "id": "a796e709", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32945.000000\n", + "mean 2276.748885\n", + "std 4282.225204\n", + "min 2.000000\n", + "25% 300.000000\n", + "50% 700.000000\n", + "75% 1900.000000\n", + "max 40425.000000\n", + "Name: product_weight_g, dtype: float64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_weight_g'].describe() #take median" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "be8f7039", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 30.815078\n", + "std 16.914458\n", + "min 7.000000\n", + "25% 18.000000\n", + "50% 25.000000\n", + "75% 38.000000\n", + "max 105.000000\n", + "Name: product_length_cm, dtype: float64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_length_cm'].describe() #take average" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "0271cb2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 16.937661\n", + "std 13.637554\n", + "min 2.000000\n", + "25% 8.000000\n", + "50% 13.000000\n", + "75% 21.000000\n", + "max 105.000000\n", + "Name: product_height_cm, dtype: float64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_height_cm'].describe() #take average" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "ac8d3124", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 23.196728\n", + "std 12.079047\n", + "min 6.000000\n", + "25% 15.000000\n", + "50% 20.000000\n", + "75% 30.000000\n", + "max 118.000000\n", + "Name: product_width_cm, dtype: float64" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_width_cm'].describe() #take average" + ] + }, { "cell_type": "code", "execution_count": 36, - "id": "9c325836", + "id": "c098908b", "metadata": {}, "outputs": [ { @@ -101205,7 +101321,7 @@ { "cell_type": "code", "execution_count": 39, - "id": "9434e037", + "id": "09cd1275", "metadata": {}, "outputs": [ { @@ -101226,7 +101342,7 @@ { "cell_type": "code", "execution_count": null, - "id": "05b48544", + "id": "f94b5920", "metadata": {}, "outputs": [], "source": [ @@ -101235,7 +101351,7 @@ }, { "cell_type": "markdown", - "id": "ce588579", + "id": "ccf39e09", "metadata": {}, "source": [ "### Data cleaning pipeline" @@ -101244,7 +101360,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8132c302", + "id": "f69c99db", "metadata": {}, "outputs": [], "source": [ diff --git a/Connect Python_MySQL-D.ipynb b/Connect Python_MySQL-D.ipynb index eab587d..90cf311 100644 --- a/Connect Python_MySQL-D.ipynb +++ b/Connect Python_MySQL-D.ipynb @@ -83,7 +83,7 @@ }, { "cell_type": "markdown", - "id": "a724f5bb", + "id": "fd2da802", "metadata": {}, "source": [ "Inspect the Data Base" @@ -100141,7 +100141,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "c2c50951", + "id": "8b8e1dff", "metadata": {}, "outputs": [ { @@ -100279,7 +100279,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "9e3c9f17", + "id": "f065e8a7", "metadata": {}, "outputs": [ { @@ -100416,7 +100416,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "2f7a5664", + "id": "09884db8", "metadata": {}, "outputs": [ { @@ -100516,7 +100516,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "6bc399a4", + "id": "0ca2cd8d", "metadata": {}, "outputs": [ { @@ -100622,7 +100622,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "76e9c080", + "id": "457af3fd", "metadata": {}, "outputs": [ { @@ -100746,12 +100746,12 @@ }, { "cell_type": "code", - "execution_count": 67, - "id": "9bef36fe", + "execution_count": 69, + "id": "31611c8d", "metadata": {}, "outputs": [], "source": [ - "# query8 = \"SELECT * olist.order_items INNER JOIN olist.order_status_year_price ON order_id\"\n", + "# query8 = \"SELECT * olist.order_items JOIN olist.order_status_year_price ON order_item_id\"\n", "# res_df = pd.read_sql_query(query8, db_connection)\n", "# res_df.head()" ] @@ -100759,7 +100759,7 @@ { "cell_type": "code", "execution_count": 51, - "id": "0dac0a0b", + "id": "ca02e3f6", "metadata": {}, "outputs": [ { @@ -100865,7 +100865,7 @@ { "cell_type": "code", "execution_count": 54, - "id": "ad215e0a", + "id": "59fd5502", "metadata": {}, "outputs": [ { @@ -100995,7 +100995,7 @@ }, { "cell_type": "markdown", - "id": "818548a2", + "id": "8fc7b14a", "metadata": {}, "source": [ "### Data exploration" @@ -101004,7 +101004,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "85806400", + "id": "2956ef12", "metadata": {}, "outputs": [ { @@ -101027,7 +101027,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "a99a41f3", + "id": "cfac4819", "metadata": {}, "outputs": [ { @@ -101048,7 +101048,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "6815d389", + "id": "dc2268f5", "metadata": {}, "outputs": [ { @@ -101069,7 +101069,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "636434a7", + "id": "2fe8f647", "metadata": {}, "outputs": [ { @@ -101090,7 +101090,7 @@ { "cell_type": "code", "execution_count": 55, - "id": "432b4175", + "id": "d30a6203", "metadata": {}, "outputs": [ { @@ -101111,7 +101111,7 @@ { "cell_type": "code", "execution_count": 56, - "id": "9944eaeb", + "id": "471e0cfc", "metadata": {}, "outputs": [ { @@ -101132,7 +101132,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "a692627a", + "id": "cda20df7", "metadata": {}, "outputs": [ { @@ -101153,7 +101153,7 @@ { "cell_type": "code", "execution_count": 38, - "id": "38383622", + "id": "b63018f5", "metadata": {}, "outputs": [ { @@ -101174,17 +101174,133 @@ { "cell_type": "code", "execution_count": null, - "id": "211b8568", + "id": "6f71fc7f", "metadata": {}, "outputs": [], "source": [ "# columns of interest: product_id\tproduct_category_name\tproduct_weight_g\tproduct_length_cm\tproduct_height_cm\tproduct_width_cm" ] }, + { + "cell_type": "code", + "execution_count": 77, + "id": "a796e709", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32945.000000\n", + "mean 2276.748885\n", + "std 4282.225204\n", + "min 2.000000\n", + "25% 300.000000\n", + "50% 700.000000\n", + "75% 1900.000000\n", + "max 40425.000000\n", + "Name: product_weight_g, dtype: float64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_weight_g'].describe() #take median" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "be8f7039", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 30.815078\n", + "std 16.914458\n", + "min 7.000000\n", + "25% 18.000000\n", + "50% 25.000000\n", + "75% 38.000000\n", + "max 105.000000\n", + "Name: product_length_cm, dtype: float64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_length_cm'].describe() #take average" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "0271cb2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 16.937661\n", + "std 13.637554\n", + "min 2.000000\n", + "25% 8.000000\n", + "50% 13.000000\n", + "75% 21.000000\n", + "max 105.000000\n", + "Name: product_height_cm, dtype: float64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_height_cm'].describe() #take average" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "ac8d3124", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 32949.000000\n", + "mean 23.196728\n", + "std 12.079047\n", + "min 6.000000\n", + "25% 15.000000\n", + "50% 20.000000\n", + "75% 30.000000\n", + "max 118.000000\n", + "Name: product_width_cm, dtype: float64" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_df['product_width_cm'].describe() #take average" + ] + }, { "cell_type": "code", "execution_count": 36, - "id": "9c325836", + "id": "c098908b", "metadata": {}, "outputs": [ { @@ -101205,7 +101321,7 @@ { "cell_type": "code", "execution_count": 39, - "id": "9434e037", + "id": "09cd1275", "metadata": {}, "outputs": [ { @@ -101226,7 +101342,7 @@ { "cell_type": "code", "execution_count": null, - "id": "05b48544", + "id": "f94b5920", "metadata": {}, "outputs": [], "source": [ @@ -101235,7 +101351,7 @@ }, { "cell_type": "markdown", - "id": "ce588579", + "id": "ccf39e09", "metadata": {}, "source": [ "### Data cleaning pipeline" @@ -101244,7 +101360,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8132c302", + "id": "f69c99db", "metadata": {}, "outputs": [], "source": [