-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathestimate.py
38 lines (28 loc) · 1.1 KB
/
estimate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
'''
estmate.py
This script estimates the number of tigers in the census data
Usage:
python estimate.py'''
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# Load the train dataset
df = pd.read_csv('Train.csv')
df = df.drop(['Trail','Soil','Left'], axis = 1)
# Split the data into training and testing sets
X = df.drop('Distinct', axis = 1)
y = df['Distinct']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# Train the K-Nearest Neighbor model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
# Test the model on the test data
y_pred = knn.predict(X_test)
print("Accuracy of KNN on test data: ", accuracy_score(y_test, y_pred))
# Load test data
new_data = pd.read_csv('Census.csv')
# Predict the values using the trained KNN model
new_data['Distinct'] = knn.predict(new_data)
# Print the sum of the values in the predicted column -> Total number of tigers
print("Total predicted number of Tigers is: ", new_data['Distinct'].sum())