Skip to content

Commit

Permalink
cleaned dataframes
Browse files Browse the repository at this point in the history
  • Loading branch information
plumeris committed Sep 7, 2021
1 parent 9ee4d0e commit 468ccf7
Show file tree
Hide file tree
Showing 10 changed files with 1,559,618 additions and 17 deletions.
217 changes: 201 additions & 16 deletions Connect Python_MySQL-D.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101611,7 +101611,7 @@
{
"cell_type": "code",
"execution_count": 34,
"id": "0242076f",
"id": "5a9a17a0",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -101625,7 +101625,7 @@
},
{
"cell_type": "markdown",
"id": "c8ba7767",
"id": "b2e22341",
"metadata": {},
"source": [
"----"
Expand Down Expand Up @@ -101666,18 +101666,18 @@
{
"cell_type": "code",
"execution_count": 38,
"id": "f9db7718",
"id": "a4891b1c",
"metadata": {},
"outputs": [],
"source": [
"# removing missing date values\n",
"orders_df.dropna(subset=[\"order_approved_at\",\"order_delivered_carrier_date\",\"order_delivered_customer_date\"],inplace=True)\n"
"orders_df.dropna(subset=[\"order_approved_at\",\"order_delivered_carrier_date\",\"order_delivered_customer_date\"],inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "fb80e79c",
"id": "d2bebb18",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -101813,7 +101813,7 @@
{
"cell_type": "code",
"execution_count": 40,
"id": "546c884f",
"id": "76d164d9",
"metadata": {},
"outputs": [
{
Expand All @@ -101834,7 +101834,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a8b5bbae",
"id": "9046afe9",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -101859,7 +101859,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "79230314",
"id": "f351dbe3",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -101904,10 +101904,135 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 43,
"id": "cda6924f",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>product_id</th>\n",
" <th>product_name_length</th>\n",
" <th>product_description_length</th>\n",
" <th>product_photos_qty</th>\n",
" <th>product_weight_g</th>\n",
" <th>product_length_cm</th>\n",
" <th>product_height_cm</th>\n",
" <th>product_width_cm</th>\n",
" <th>product_category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1e9e8ef04dbcff4541ed26657ea517e5</td>\n",
" <td>40.0</td>\n",
" <td>287.0</td>\n",
" <td>1.0</td>\n",
" <td>225.0</td>\n",
" <td>16.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>perfumery\\r</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6a2fb4dd53d2cdb88e0432f1284a004c</td>\n",
" <td>39.0</td>\n",
" <td>346.0</td>\n",
" <td>2.0</td>\n",
" <td>400.0</td>\n",
" <td>27.0</td>\n",
" <td>5.0</td>\n",
" <td>20.0</td>\n",
" <td>perfumery\\r</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0d009643171aee696f4733340bc2fdd0</td>\n",
" <td>52.0</td>\n",
" <td>150.0</td>\n",
" <td>1.0</td>\n",
" <td>422.0</td>\n",
" <td>21.0</td>\n",
" <td>16.0</td>\n",
" <td>18.0</td>\n",
" <td>perfumery\\r</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>b1eae565a61935e0011ee7682fef9dc9</td>\n",
" <td>49.0</td>\n",
" <td>460.0</td>\n",
" <td>2.0</td>\n",
" <td>267.0</td>\n",
" <td>17.0</td>\n",
" <td>13.0</td>\n",
" <td>17.0</td>\n",
" <td>perfumery\\r</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8da90b37f0fb171b4877c124f965b1f6</td>\n",
" <td>56.0</td>\n",
" <td>733.0</td>\n",
" <td>3.0</td>\n",
" <td>377.0</td>\n",
" <td>18.0</td>\n",
" <td>13.0</td>\n",
" <td>15.0</td>\n",
" <td>perfumery\\r</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" product_id product_name_length \\\n",
"0 1e9e8ef04dbcff4541ed26657ea517e5 40.0 \n",
"1 6a2fb4dd53d2cdb88e0432f1284a004c 39.0 \n",
"2 0d009643171aee696f4733340bc2fdd0 52.0 \n",
"3 b1eae565a61935e0011ee7682fef9dc9 49.0 \n",
"4 8da90b37f0fb171b4877c124f965b1f6 56.0 \n",
"\n",
" product_description_length product_photos_qty product_weight_g \\\n",
"0 287.0 1.0 225.0 \n",
"1 346.0 2.0 400.0 \n",
"2 150.0 1.0 422.0 \n",
"3 460.0 2.0 267.0 \n",
"4 733.0 3.0 377.0 \n",
"\n",
" product_length_cm product_height_cm product_width_cm product_category \n",
"0 16.0 10.0 14.0 perfumery\\r \n",
"1 27.0 5.0 20.0 perfumery\\r \n",
"2 21.0 16.0 18.0 perfumery\\r \n",
"3 17.0 13.0 17.0 perfumery\\r \n",
"4 18.0 13.0 15.0 perfumery\\r "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trans_product_df = pd.merge(products_df, product_category_translation_df, on ='product_category_name')\n",
"trans_product_df.rename(columns={'product_category_name_english': 'product_category'}, inplace=True)\n",
Expand All @@ -101927,22 +102052,82 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "63ee4428",
"execution_count": 44,
"id": "afabc271",
"metadata": {},
"outputs": [],
"source": [
"translated_product_df.to_csv('translated_product_df.csv')"
"trans_product_df.to_csv('trans_product_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8917d0c2",
"execution_count": 41,
"id": "b8ddee1b",
"metadata": {},
"outputs": [],
"source": [
"orders_df.to_csv('orders_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "1b2fe379",
"metadata": {},
"outputs": [],
"source": [
"customers_df.to_csv('customers_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "c101b6a6",
"metadata": {},
"outputs": [],
"source": [
"order_payments_df.to_csv('order_payments_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "1350a3cc",
"metadata": {},
"outputs": [],
"source": [
"order_items_df.to_csv('order_items_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "1601184d",
"metadata": {},
"outputs": [],
"source": [
"order_customer_items_paid_df.to_csv('order_customer_items_paid_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "5519a0f0",
"metadata": {},
"outputs": [],
"source": [
"geolocation_df.to_csv('geolocation_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "37589342",
"metadata": {},
"outputs": [],
"source": [
"orders_df.to_csv('orders_df1.csv')"
"sellers_df.to_csv('sellers_df.csv')"
]
},
{
Expand Down
Loading

0 comments on commit 468ccf7

Please sign in to comment.