From 30e363b6223eb09f97ad1ff181d54732fdbc9a3c Mon Sep 17 00:00:00 2001
From: ankit kumar suman <52414098+techboyankit@users.noreply.github.com>
Date: Sat, 30 Oct 2021 10:23:51 -0700
Subject: [PATCH] Create Diabetics_Prediction

Assignment Solution.
---
 Diabetics_Prediction | 80 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 Diabetics_Prediction

diff --git a/Diabetics_Prediction b/Diabetics_Prediction
new file mode 100644
index 0000000..caf4f4c
--- /dev/null
+++ b/Diabetics_Prediction
@@ -0,0 +1,80 @@
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn import svm
+from sklearn.metrics import accuracy_score
+
+# loading the diabetes dataset to a pandas DataFrame
+diabetes_dataset = pd.read_csv('diabetes.csv') 
+
+pd.read_csv?
+
+# printing the first 5 rows of the dataset
+diabetes_dataset.head()
+
+# number of rows and Columns in this dataset
+diabetes_dataset.shape
+
+# getting the statistical measures of the data
+diabetes_dataset.describe()
+
+diabetes_dataset['Outcome'].value_counts()
+
+diabetes_dataset.groupby('Outcome').mean()
+
+# separating the data and labels
+X = diabetes_dataset.drop(columns = 'Outcome', axis=1)
+Y = diabetes_dataset['Outcome']
+
+print(X)
+
+print(Y)
+
+# Data Standardization
+scaler = StandardScaler()
+scaler.fit(X)
+standardized_data = scaler.transform(X)
+print(standardized_data)
+X = standardized_data
+Y = diabetes_dataset['Outcome']
+print(X)
+print(Y)
+
+# Train Test Split
+X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)
+print(X.shape, X_train.shape, X_test.shape)
+
+#Training the Model
+classifier = svm.SVC(kernel='linear')
+
+#training the support vector Machine Classifier
+classifier.fit(X_train, Y_train)
+
+# Model Evaluation
+# Accuracy Score
+
+# accuracy score on the training data
+X_train_prediction = classifier.predict(X_train)
+training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
+print('Accuracy score of the training data : ', training_data_accuracy)
+# accuracy score on the test data
+X_test_prediction = classifier.predict(X_test)
+test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
+print('Accuracy score of the test data : ', test_data_accuracy)
+
+# Making a Predictive System
+input_data = (5,166,72,19,175,25.8,0.587,51)
+# changing the input_data to numpy array
+input_data_as_numpy_array = np.asarray(input_data)
+# reshape the array as we are predicting for one instance
+input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
+# standardize the input data
+std_data = scaler.transform(input_data_reshaped)
+print(std_data)
+prediction = classifier.predict(std_data)
+print(prediction)
+if (prediction[0] == 0):
+  print('The person is not diabetic')
+else:
+  print('The person is diabetic')