Diabetes - Prediction - Project - Ipynb - Colab
Diabetes - Prediction - Project - Ipynb - Colab
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
#
dataset=pd.read_csv('diabetes.csv')
dataset.head()
1 1 85 66 29 0 26.6 0.351 31 0
3 1 89 66 23 94 28.1 0.167 21 0
dataset.columns
dataset.describe()
dataset.isnull().sum()
Pregnancies 0
Glucose 5
BloodPressure 35
SkinThickness 227
Insulin 374
BMI 11
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64
print(dataset.head(10))
We need to replace Nan values with the mean or median of the column
dataset['Glucose'].fillna(dataset['Glucose'].mean(), inplace=True)
dataset['BloodPressure'].fillna(dataset['BloodPressure'].mean(), inplace=True)
dataset['SkinThickness'].fillna(dataset['SkinThickness'].median(), inplace=True)
dataset['Insulin'].fillna(dataset['Insulin'].median(), inplace=True)
dataset['BMI'].fillna(dataset['BMI'].median(), inplace=True)
print(dataset)
dataset.hist(figsize=(10,10))
plt.subplot(121)
sns.distplot(dataset['Insulin'])
plt.subplot(122)
dataset['Insulin'].plot.box(figsize=(16, 5))
plt.show()
plt.subplot(121)
sns.distplot(dataset['Glucose'])
plt.subplot(122)
dataset['Glucose'].plot.box(figsize=(16, 5))
plt.show()
plt.subplot(121)
sns.distplot(dataset['BMI'])
plt.subplot(122)
dataset['BMI'].plot.box(figsize=(16, 5))
plt.show()
plt.subplot(121)
sns.distplot(dataset['BloodPressure'])
plt.subplot(122)
dataset['BloodPressure'].plot.box(figsize=(16, 5))
plt.show()
dataset.corr()
sns.heatmap(dataset.corr(), annot=True)
<Axes: >
Feature Scaling
X_train
X_test
y_train
array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
1, 0, 0, 0])
y_test
array([1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0])
Logistic Regression
▾ LogisticRegression
LogisticRegression(random_state=0)
lr_train = lr.predict(X_train)
print(confusion_matrix(y_train,lr_train))
print(accuracy_score(y_train,lr_train))
[[322 48]
[ 89 117]]
0.7621527777777778
lr_test= lr.predict(X_test)
print(confusion_matrix(y_test,lr_test))
print(accuracy_score(y_test,lr_test))
[[117 13]
[ 28 34]]
0.7864583333333334
K- NEAREST NEIGHBORS
▾ KNeighborsClassifier
KNeighborsClassifier(n_neighbors=15)
knn_train = knn.predict(X_train)
print(confusion_matrix(y_train,knn_train))
print(accuracy_score(y_train,knn_train))
[[332 38]
[ 84 122]]
0.7881944444444444
knn_test= knn.predict(X_test)
print(confusion_matrix(y_test,knn_test))
print(accuracy_score(y_test,knn_test))
[[118 12]
[ 25 37]]
0.8072916666666666
dt_train=dt.predict(X_train)
confusion_matrix(y_train,dt_train)
accuracy_score(y_train,dt_train)
1.0
dt_pred=dt.predict(X_test)
confusion_matrix(y_test,dt_pred)
accuracy_score(y_test,dt_pred)
0.7135416666666666
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train, y_train)
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=200)
rfc_train=rfc.predict(X_train)
print(confusion_matrix(y_train,rfc_train))
print(accuracy_score(y_train,rfc_train))
[[370 0]
[ 0 206]]
1.0
rfc_pred=rfc.predict(X_test)
print(confusion_matrix(y_test,rfc_pred))
print(accuracy_score(y_test,rfc_pred))
[[113 17]
[ 28 34]]
0.765625
▾ SVC
SVC(C=0.25, random_state=0)
svc_train=svc.predict(X_train)
print(confusion_matrix(y_train,svc_train))
print(accuracy_score(y_train,svc_train))
[[343 27]
[ 89 117]]
0.7986111111111112
svc_pred=svc.predict(X_test)
print(confusion_matrix(y_test,svc_pred))
print(accuracy_score(y_test,svc_pred))
[[122 8]
[ 32 30]]
0.7916666666666666
NAIVE BAYES
▾ GaussianNB
GaussianNB()
nb_train=nb.predict(X_train)
print(confusion_matrix(y_train,nb_train))
print(accuracy_score(y_train,nb_train))
[[305 65]
[ 81 125]]
0.7465277777777778
nb_test=nb.predict(X_test)
print(confusion_matrix(y_test,nb_test))
print(accuracy_score(y_test,nb_test))
[[113 17]
[ 27 35]]
0.7708333333333334
[[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[1 1]
[1 1]
[0 0]
[0 0]
[1 1]
[1 1]
[0 0]
[0 0]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[0 1]
[0 1]
[0 0]
[0 0]
[0 0]
[0 0]
[0 0]
[1 0]
[0 0]
[0 0]
[1 0]
[0 0]
[0 0]
[1 0]
[0 0]
[1 1]
[0 1]
[0 0]
[1 0]
[1 0]
[0 0]
[0 0]
[0 0]
[1 1]
[1 1]
[0 0]
[0 0]
[0 1]
[1 1]
[0 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[0 0]
[0 1]
print(confusion_matrix(y_test,knn_test))
acc=accuracy_score(y_test,knn_test)
print("Accuracy: {:.2f} %".format(acc*100))
[[118 12]
[ 25 37]]
Accuracy: 80.73 %
print("y_pred")
print((knn.predict([X_test[3]])))
print("y_true")
print(y_test[3])
y_pred
[1]
y_true
1
p=knn.predict(sc.transform([[0,137,40,35,168,43.1,2.228,33]]))
print(p)
if p==0:
print("Not Diabetic")
else:
print("Diabetic")
[1]
Diabetic
tf.__version__
'2.15.0'
ann= tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=16,activation='relu'))
ann.add(tf.keras.layers.Dense(units=16,activation='relu'))
ann.add(tf.keras.layers.Dense(units=8,activation='relu'))
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
ann.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
ann.fit(X_train,y_train,batch_size=32,epochs=100)
Epoch 1/100
18/18 [==============================] - 2s 4ms/step - loss: 0.7237 - accuracy: 0.4132
Epoch 2/100
18/18 [==============================] - 0s 4ms/step - loss: 0.6966 - accuracy: 0.5608
Epoch 3/100
18/18 [==============================] - 0s 8ms/step - loss: 0.6826 - accuracy: 0.6372
Epoch 4/100
18/18 [==============================] - 0s 5ms/step - loss: 0.6740 - accuracy: 0.6667
Epoch 5/100
18/18 [==============================] - 0s 5ms/step - loss: 0.6647 - accuracy: 0.6823
Epoch 6/100
18/18 [==============================] - 0s 11ms/step - loss: 0.6552 - accuracy: 0.6944
Epoch 7/100
18/18 [==============================] - 0s 7ms/step - loss: 0.6431 - accuracy: 0.6997
Epoch 8/100
18/18 [==============================] - 0s 7ms/step - loss: 0.6287 - accuracy: 0.7066
Epoch 9/100
18/18 [==============================] - 0s 8ms/step - loss: 0.6100 - accuracy: 0.7101
Epoch 10/100
18/18 [==============================] - 0s 5ms/step - loss: 0.5886 - accuracy: 0.7153
Epoch 11/100
18/18 [==============================] - 0s 6ms/step - loss: 0.5659 - accuracy: 0.7240
Epoch 12/100
18/18 [==============================] - 0s 9ms/step - loss: 0.5456 - accuracy: 0.7361
Epoch 13/100
18/18 [==============================] - 0s 12ms/step - loss: 0.5276 - accuracy: 0.7292
Epoch 14/100
18/18 [==============================] - 0s 8ms/step - loss: 0.5155 - accuracy: 0.7378
Epoch 15/100
18/18 [==============================] - 0s 4ms/step - loss: 0.5053 - accuracy: 0.7517
Epoch 16/100
18/18 [==============================] - 0s 6ms/step - loss: 0.4980 - accuracy: 0.7552
Epoch 17/100
18/18 [==============================] - 0s 5ms/step - loss: 0.4902 - accuracy: 0.7656
Epoch 18/100
18/18 [==============================] - 0s 3ms/step - loss: 0.4851 - accuracy: 0.7622
Epoch 19/100
18/18 [==============================] - 0s 7ms/step - loss: 0.4810 - accuracy: 0.7656
Epoch 20/100
18/18 [==============================] - 0s 16ms/step - loss: 0.4762 - accuracy: 0.7760
Epoch 21/100
18/18 [==============================] - 0s 8ms/step - loss: 0.4714 - accuracy: 0.7743
Epoch 22/100
18/18 [==============================] - 0s 8ms/step - loss: 0.4679 - accuracy: 0.7778
Epoch 23/100
18/18 [==============================] - 0s 7ms/step - loss: 0.4643 - accuracy: 0.7795
Epoch 24/100
18/18 [==============================] - 0s 6ms/step - loss: 0.4628 - accuracy: 0.7778
Epoch 25/100
18/18 [==============================] - 0s 7ms/step - loss: 0.4587 - accuracy: 0.7830
Epoch 26/100
18/18 [==============================] - 0s 8ms/step - loss: 0.4553 - accuracy: 0.7812
Epoch 27/100
18/18 [==============================] - 0s 11ms/step - loss: 0.4521 - accuracy: 0.7882
Epoch 28/100
18/18 [==============================] - 0s 6ms/step - loss: 0.4494 - accuracy: 0.7865
Epoch 29/100
18/18 [==============================] - 0s 8ms/step - loss: 0.4471 - accuracy: 0.7899
a_pred=ann.predict(X_test)
a_pred=(a_pred>0.5)
print(np.concatenate((a_pred.reshape(len(a_pred),1), y_test.reshape(len(y_test),1)),1))
a_train=ann.predict(X_train)
a_train=(a_train>0.5)
print(np.concatenate((a_train.reshape(len(a_train),1), y_train.reshape(len(y_train),1)),1))
[[333 37]
[ 55 151]]
Accuracy: 84.03 %
cm=confusion_matrix(y_test,a_pred)
print(cm)
acc=accuracy_score(y_test,a_pred)
print("Accuracy: {:.2f} %".format(acc*100))
[[115 15]
[ 23 39]]
Accuracy: 80.21 %
b=(ann.predict(sc.transform([[0,137,40,35,168,43.1,2.228,33]]))>0.5)
if b==True:
print("Diabetic")
else:
print("Not Diabetic")