Decision tree
Decision tree
In [2]: df=pd.read_csv('kyphosis.csv')
In [3]: df.head()
0 absent 71 3 5
1 absent 158 3 14
2 present 128 4 5
3 absent 2 5 1
4 absent 1 4 15
In [4]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Kyphosis 81 non-null object
1 Age 81 non-null int64
2 Number 81 non-null int64
3 Start 81 non-null int64
dtypes: int64(3), object(1)
memory usage: 2.7+ KB
In [5]: sns.pairplot(df,hue='Kyphosis')
#in pairplot we find how the columns are related to each other and their distribution
<seaborn.axisgrid.PairGrid at 0x2d05afdef70>
Out[5]:
In [6]: from sklearn.model_selection import train_test_split
In [7]: X=df.drop('Kyphosis',axis=1)
In [8]: y=df['Kyphosis']
In [11]: dtree=DecisionTreeClassifier()
In [12]: dtree.fit(X_train,y_train)
DecisionTreeClassifier()
Out[12]:
In [13]: prediction=dtree.predict(X_test)
In [15]: print(confusion_matrix(y_test,prediction))
print('\n')
print(classification_report(y_test,prediction))
[[22 0]
[ 3 2]]
accuracy 0.89 27
macro avg 0.94 0.70 0.75 27
weighted avg 0.90 0.89 0.87 27
In [17]: rfc=RandomForestClassifier(n_estimators=200)
In [18]: rfc.fit(X_train,y_train)
RandomForestClassifier(n_estimators=200)
Out[18]:
In [19]: rfc_predict=rfc.predict(X_test)
In [20]: print(confusion_matrix(y_test,rfc_predict))
print('\n')
print(classification_report(y_test,rfc_predict))
[[22 0]
[ 4 1]]
accuracy 0.85 27
macro avg 0.92 0.60 0.62 27
weighted avg 0.87 0.85 0.81 27
In [ ]: