-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab1.py
39 lines (29 loc) · 1.14 KB
/
lab1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas
# import the data set
path = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv"
df = pandas.read_csv(path, header=None)
print("The first 5 rows of the dataframe")
print(df.head())
print("The last 10 rows of the dataframe")
print(df.tail(10))
# create and assign headers list
headers = ["symboling","normalized-losses","make","fuel-type","aspiration", "num-of-doors","body-style",
"drive-wheels","engine-location","wheel-base", "length","width","height","curb-weight","engine-type",
"num-of-cylinders", "engine-size","fuel-system","bore","stroke","compression-ratio","horsepower",
"peak-rpm","city-mpg","highway-mpg","price"]
df.columns = headers
print(df.head())
# find the names of the columns
print(df.columns)
# drone row with no price
df.dropna(subset=["price"], axis=0)
# save the data set
df.to_csv("automobile.csv", index=False)
# show column types
print(df.dtypes)
# show statistics
print(df.describe())
# describe all the columns in "df"
print(df.describe(include = "all"))
print(df[['price', 'compression-ratio']].describe(include="all"))
print(df.info)