Skip to content

Commit

Permalink
midterm project
Browse files Browse the repository at this point in the history
  • Loading branch information
plumeris committed Sep 6, 2021
1 parent bdc5ab7 commit 4ee6bb5
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 52 deletions.
168 changes: 142 additions & 26 deletions .ipynb_checkpoints/Connect Python_MySQL-D-checkpoint.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
},
{
"cell_type": "markdown",
"id": "a724f5bb",
"id": "fd2da802",
"metadata": {},
"source": [
"Inspect the Data Base"
Expand Down Expand Up @@ -100141,7 +100141,7 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "c2c50951",
"id": "8b8e1dff",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100279,7 +100279,7 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "9e3c9f17",
"id": "f065e8a7",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100416,7 +100416,7 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "2f7a5664",
"id": "09884db8",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100516,7 +100516,7 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "6bc399a4",
"id": "0ca2cd8d",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100622,7 +100622,7 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "76e9c080",
"id": "457af3fd",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100746,20 +100746,20 @@
},
{
"cell_type": "code",
"execution_count": 67,
"id": "9bef36fe",
"execution_count": 69,
"id": "31611c8d",
"metadata": {},
"outputs": [],
"source": [
"# query8 = \"SELECT * olist.order_items INNER JOIN olist.order_status_year_price ON order_id\"\n",
"# query8 = \"SELECT * olist.order_items JOIN olist.order_status_year_price ON order_item_id\"\n",
"# res_df = pd.read_sql_query(query8, db_connection)\n",
"# res_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "0dac0a0b",
"id": "ca02e3f6",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100865,7 +100865,7 @@
{
"cell_type": "code",
"execution_count": 54,
"id": "ad215e0a",
"id": "59fd5502",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -100995,7 +100995,7 @@
},
{
"cell_type": "markdown",
"id": "818548a2",
"id": "8fc7b14a",
"metadata": {},
"source": [
"### Data exploration"
Expand All @@ -101004,7 +101004,7 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "85806400",
"id": "2956ef12",
"metadata": {},
"outputs": [
{
Expand All @@ -101027,7 +101027,7 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "a99a41f3",
"id": "cfac4819",
"metadata": {},
"outputs": [
{
Expand All @@ -101048,7 +101048,7 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "6815d389",
"id": "dc2268f5",
"metadata": {},
"outputs": [
{
Expand All @@ -101069,7 +101069,7 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "636434a7",
"id": "2fe8f647",
"metadata": {},
"outputs": [
{
Expand All @@ -101090,7 +101090,7 @@
{
"cell_type": "code",
"execution_count": 55,
"id": "432b4175",
"id": "d30a6203",
"metadata": {},
"outputs": [
{
Expand All @@ -101111,7 +101111,7 @@
{
"cell_type": "code",
"execution_count": 56,
"id": "9944eaeb",
"id": "471e0cfc",
"metadata": {},
"outputs": [
{
Expand All @@ -101132,7 +101132,7 @@
{
"cell_type": "code",
"execution_count": 35,
"id": "a692627a",
"id": "cda20df7",
"metadata": {},
"outputs": [
{
Expand All @@ -101153,7 +101153,7 @@
{
"cell_type": "code",
"execution_count": 38,
"id": "38383622",
"id": "b63018f5",
"metadata": {},
"outputs": [
{
Expand All @@ -101174,17 +101174,133 @@
{
"cell_type": "code",
"execution_count": null,
"id": "211b8568",
"id": "6f71fc7f",
"metadata": {},
"outputs": [],
"source": [
"# columns of interest: product_id\tproduct_category_name\tproduct_weight_g\tproduct_length_cm\tproduct_height_cm\tproduct_width_cm"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "a796e709",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 32945.000000\n",
"mean 2276.748885\n",
"std 4282.225204\n",
"min 2.000000\n",
"25% 300.000000\n",
"50% 700.000000\n",
"75% 1900.000000\n",
"max 40425.000000\n",
"Name: product_weight_g, dtype: float64"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_df['product_weight_g'].describe() #take median"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "be8f7039",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 32949.000000\n",
"mean 30.815078\n",
"std 16.914458\n",
"min 7.000000\n",
"25% 18.000000\n",
"50% 25.000000\n",
"75% 38.000000\n",
"max 105.000000\n",
"Name: product_length_cm, dtype: float64"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_df['product_length_cm'].describe() #take average"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "0271cb2d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 32949.000000\n",
"mean 16.937661\n",
"std 13.637554\n",
"min 2.000000\n",
"25% 8.000000\n",
"50% 13.000000\n",
"75% 21.000000\n",
"max 105.000000\n",
"Name: product_height_cm, dtype: float64"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_df['product_height_cm'].describe() #take average"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "ac8d3124",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 32949.000000\n",
"mean 23.196728\n",
"std 12.079047\n",
"min 6.000000\n",
"25% 15.000000\n",
"50% 20.000000\n",
"75% 30.000000\n",
"max 118.000000\n",
"Name: product_width_cm, dtype: float64"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_df['product_width_cm'].describe() #take average"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "9c325836",
"id": "c098908b",
"metadata": {},
"outputs": [
{
Expand All @@ -101205,7 +101321,7 @@
{
"cell_type": "code",
"execution_count": 39,
"id": "9434e037",
"id": "09cd1275",
"metadata": {},
"outputs": [
{
Expand All @@ -101226,7 +101342,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "05b48544",
"id": "f94b5920",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -101235,7 +101351,7 @@
},
{
"cell_type": "markdown",
"id": "ce588579",
"id": "ccf39e09",
"metadata": {},
"source": [
"### Data cleaning pipeline"
Expand All @@ -101244,7 +101360,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "8132c302",
"id": "f69c99db",
"metadata": {},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit 4ee6bb5

Please sign in to comment.