-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathZamoto Analysis.py
81 lines (51 loc) · 1.87 KB
/
Zamoto Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Zamoto Analysis
# Import Libaries
import pandas as pd
import numpy as np
# Loading .csv file of zomato data using pandas
df_data = pd.read_csv(r"C:\Users\ritik\OneDrive\Documents\project data analysis\Power bi project\Zamoto project\Zamoto raw data\zomato.csv", encoding='unicode_escape')
print(df_data)
# Loading excel file of country code using pandas
df_country = pd.read_excel(r"C:\Users\ritik\OneDrive\Documents\project data analysis\Power bi project\Zamoto project\Zamoto raw data\Country-Code.xlsx")
print(df_country)
# Data Cleaning (Zamaoto data)
#check the datatype
df_data.info()
# Check the Null Values
print(df_data.isnull().sum())
# the top 5 rows of sales data
print(df_data.head(10))
# describe return description of the data in the DataFrame
print(df_data.describe())
# Check Duplicates
print(df_data.duplicated().sum())
# Columns Names
print(df_data.columns)
#check the datatype
print(df_data.dtypes)
print(df_data.shape)
# now clean the Country Code Data
# view data of country code
print(df_country.head())
print(df_country.info)
print(df_country.shape)
#checking null values
print(df_country.isnull().sum())
# checking Duplicates
df_country.duplicated().sum()
# Merging country code and Data file)
df=pd.merge(df_data,df_country,on='Country Code', how= 'left')
print(df.head())
#droping unwanted columns
df.drop(['Restaurant ID','Locality Verbose','Country Code','Longitude','Latitude'],axis=1,inplace=True)
print(df.columns)
# checking the null values after merging
df.isnull().sum()
# remove null values
df=df.dropna()
print(df.isnull().sum())
# count total transcation happen in all the world
print(df.Country.value_counts())
#downlaod data
#df.to_csv(r"C:\Users\ritik\OneDrive\Documents\project data analysis\Power bi project\Zamoto project\zomato.csv")
print(df.to_string)