-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapp.py
115 lines (83 loc) · 3.67 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
from flask import Flask, request, jsonify, render_template
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
import pandas as pd
import os
import streamlit as st
# Load the data
df = pd.read_csv('/home/ashioyajotham/Downloads/PMTCT-Data-Behavior-Identification-and-Clean-Up-Automation/Data/data.csv')
# Preprocess the data
target = "PMTCT"
features = ['facility', 'ward', 'sub_county', 'county', 'indicators',
'khis_data', 'datim_value', 'period', 'Month']
if st.checkbox('Show dataframe'):
st.write(df)
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
lr_model = make_pipeline(
OneHotEncoder(handle_unknown = "ignore"),
LogisticRegression()
)
lr_model.fit(X_train, y_train)
# Encode the data
encoder = OneHotEncoder(handle_unknown = "ignore")
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)
# Fit the RF model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_encoded, y_train)
st.title("HIV Testing")
st.write("This is a simple HIV Testing prediction web app to predict whether a facility reports PMTCT or not.")
st.write("Please fill in the required details.")
st.balloons() # Adds a balloon animation
# style
st.markdown(""" <style> .reportview-container { background: #F5F5F5; } </style> """, unsafe_allow_html=True)
# Create a text element and let the reader know the data is loading.
data_load_state = st.text('Loading data...')
# Load 10,000 rows of data into the dataframe.
data = pd.read_csv('/home/ashioyajotham/Downloads/PMTCT-Data-Behavior-Identification-and-Clean-Up-Automation/Data/data.csv')
# Notify the reader that the data was successfully loaded.
data_load_state.text("Loading data...done!")
# Classify the data
st.subheader('Classify the data')
# Create a selectbox for the classification model
classifier = st.selectbox('Select the classifier', ('Logistic Regression', 'Random Forest'))
# Variables for the user input features
facility = st.text_input("facility", "Enter facility")
ward = st.text_input("ward", "Enter ward")
# Create a button which when clicked predicts the class
if st.button("Predict"):
input = df[['facility', 'ward', 'sub_county', 'county', 'indicators',
'khis_data', 'datim_value', 'period', 'Month']]
# Encode the input
input_encoded = encoder.transform(input)
# Make prediction
if classifier == 'Logistic Regression':
prediction = lr_model.predict(input_encoded)
prediction_proba = lr_model.predict_proba(input_encoded)
elif classifier == 'Random Forest':
prediction = rf_model.predict(input_encoded)
prediction_proba = rf_model.predict_proba(input_encoded)
st.subheader('Prediction')
st.write(prediction)
st.subheader('Prediction Probability')
st.write(prediction_proba)
st.subheader('Confusion Matrix')
st.write(confusion_matrix(y_test, prediction))
st.subheader('Classification Report')
st.write(classification_report(y_test, prediction))
st.subheader('Accuracy Score')
st.write(accuracy_score(y_test, prediction))
# Return the prediction
if prediction == 1:
st.write("The facility reports PMTCT")
else:
st.write("The facility does not report PMTCT")