From 30e363b6223eb09f97ad1ff181d54732fdbc9a3c Mon Sep 17 00:00:00 2001 From: ankit kumar suman <52414098+techboyankit@users.noreply.github.com> Date: Sat, 30 Oct 2021 10:23:51 -0700 Subject: [PATCH] Create Diabetics_Prediction Assignment Solution. --- Diabetics_Prediction | 80 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Diabetics_Prediction diff --git a/Diabetics_Prediction b/Diabetics_Prediction new file mode 100644 index 0000000..caf4f4c --- /dev/null +++ b/Diabetics_Prediction @@ -0,0 +1,80 @@ +import numpy as np +import pandas as pd +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from sklearn import svm +from sklearn.metrics import accuracy_score + +# loading the diabetes dataset to a pandas DataFrame +diabetes_dataset = pd.read_csv('diabetes.csv') + +pd.read_csv? + +# printing the first 5 rows of the dataset +diabetes_dataset.head() + +# number of rows and Columns in this dataset +diabetes_dataset.shape + +# getting the statistical measures of the data +diabetes_dataset.describe() + +diabetes_dataset['Outcome'].value_counts() + +diabetes_dataset.groupby('Outcome').mean() + +# separating the data and labels +X = diabetes_dataset.drop(columns = 'Outcome', axis=1) +Y = diabetes_dataset['Outcome'] + +print(X) + +print(Y) + +# Data Standardization +scaler = StandardScaler() +scaler.fit(X) +standardized_data = scaler.transform(X) +print(standardized_data) +X = standardized_data +Y = diabetes_dataset['Outcome'] +print(X) +print(Y) + +# Train Test Split +X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2) +print(X.shape, X_train.shape, X_test.shape) + +#Training the Model +classifier = svm.SVC(kernel='linear') + +#training the support vector Machine Classifier +classifier.fit(X_train, Y_train) + +# Model Evaluation +# Accuracy Score + +# accuracy score on the training data +X_train_prediction = classifier.predict(X_train) +training_data_accuracy = accuracy_score(X_train_prediction, Y_train) +print('Accuracy score of the training data : ', training_data_accuracy) +# accuracy score on the test data +X_test_prediction = classifier.predict(X_test) +test_data_accuracy = accuracy_score(X_test_prediction, Y_test) +print('Accuracy score of the test data : ', test_data_accuracy) + +# Making a Predictive System +input_data = (5,166,72,19,175,25.8,0.587,51) +# changing the input_data to numpy array +input_data_as_numpy_array = np.asarray(input_data) +# reshape the array as we are predicting for one instance +input_data_reshaped = input_data_as_numpy_array.reshape(1,-1) +# standardize the input data +std_data = scaler.transform(input_data_reshaped) +print(std_data) +prediction = classifier.predict(std_data) +print(prediction) +if (prediction[0] == 0): + print('The person is not diabetic') +else: + print('The person is diabetic')