-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPandas.py
96 lines (67 loc) · 2.28 KB
/
Pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Pandas is a powerful Python library for data manipulation and analysis.
# It provides high-performance, easy-to-use data structures and 1 data analysis tools.
# Let's dive into the basics:
# Import Pandas
import pandas as pd
#Series
# Create a Series
data = [1, 2, 3, 4, 5]
s = pd.Series(data)
print(s)
# DataFrame
# Create a DataFrame
data = {'Name': ['Ali', 'Mohammad', 'Zahra'],
'Age': [25, 31, 19]}
df = pd.DataFrame(data)
print(df)
# Reading and Writing Data
# Pandas can read data from various file formats like CSV, Excel, JSON, etc.,
# and write data to these formats as well.
# 1.
# Read a CSV file
df = pd.read_csv('data.csv')
# Write a DataFrame to a CSV file
df.to_csv('output.csv', index=False)
# 2.
df = pd.read_csv('titanic.csv')
df.iloc[:,[1,4,8]]
df.to_csv('output.csv', index=False)
# Selecting data:
# Select a column
print(df['Name'])
# Select rows by index
print(df.iloc[0])
# Select rows by condition
print(df[df['Age'] > 25])
# Pandas - Analyzing DataFrames
print(df.head(10)) #Get a quick overview by printing the first 10 rows of the DataFrame
print(df.tail()) #Print the last 5 rows of the DataFrame
print(df.info()) #Print information about the data
# Pandas - Cleaning Data
# Pandas - Cleaning Empty Cells
df = pd.read_csv('data.csv')
df.dropna(inplace = True)
print(df)
# Replace NULL values in the "Age" columns with the number 50:
df["Age"].fillna(50,inplace=True)
print(df.info())
# Pandas - Fixing Wrong Data
# Replace value
for x in df.index:
if df.loc[x, "Duration"] > 12:
df.loc[x, "Duration"] = 12
# Remove Rows
for x in df.index:
if df.loc[x, "Duration"] > 12:
df.drop(x, inplace = True)
# Removing Duplicates
dup_df = df.loc[1:20,["Age"]]
# dup_df.duplicated()
dup_df.drop_duplicates(inplace=True)
print(dup_df)
#Pandas - Data Correlations
# The corr() method calculates the relationship between each column in your data set
dup_df = df.loc[1:20,['Age','Fare']]
dup_df.duplicated()
dup_df.drop_duplicates(inplace=True)
dup_df.corr()