forked from Derrick-Sherrill/DerrickSherrill.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pd_indexing.py
71 lines (51 loc) · 1.67 KB
/
pd_indexing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import pandas as pd
"""The Script"""
#DataFrame Creation
excel_file = 'Pandas_Workbook.xlsx'
df = pd.read_excel(excel_file)
print(df)
# Python Automatically created index column for us
# Pull Information out of DataFrame
print(df.head(5))
print(df.tail(5))
print(df.index)
print(df.columns)
print(df.dtypes)
#Indexing and Selecting DataFrame
#columns (Series)
print(df['Name']) #Note this is a series
print(type(df['Name']))
print(df[['Name','Occupation']]) #This pulls two series together = dataframe
print(type(df[['Name','Occupation']]))
#Incorrect way but works
print(df['Occupation'][0]) #Chain indexing
#Better versions of the above - LABEL Indexing
print(df.at[0, 'Occupation']) #Pull single value avoiding single value avoiding chained indexed
"""Here the row labels are index positions""" #Changing row labels
df2 = pd.read_excel(excel_file, index_col='Name') #Changing index to non integer labels -- UNIQUE VALUES RECOMMENDED ON DF INDEX
print(df2)
# Label based indexing
print(df2.at["Adam", "Occupation"])
#integer based indexing
print(df2.iat[2,1]) # even no columns are numbers, we can still index off positions
print(df.loc[:,['Name']]) # pull single column
print(df.loc[:,['Name','Occupation']]) #Pull multiple columns
# Rows or columns from labels
print(df.loc[1,:])
#Pull data with slices
print(df.loc[0:5,'Name':'Occupation'])
#Booleans
#Entire operation
print(df.loc[df['Identifier']==True])
#Create boolean series
print(df['Age']>27)
print(df.loc[df['Age']>27])
print(df.loc[0:5,[True,True,True,False]])
# Integer location based
#Single values
print(df.iloc[1,1])
#Lists
print(df.iloc[[1,2,3,4],[1,2]])
#slices of above
print(df.iloc[1:5:1,1:3:1])