sumanca1485cap
sumanca1485cap
commercial_assets_value luxury_assets_value
bank_asset_value \
0 17600000 22700000
8000000
1 2200000 8800000
3300000
2 4500000 33300000
12800000
3 3300000 23300000
7900000
4 8200000 29400000
5000000
... ... ... ..
.
4264 500000 3300000
800000
4265 2900000 11000000
1900000
4266 12400000 18100000
7300000
4267 700000 14100000
5800000
4268 11800000 35700000
12000000
loan_status
0 Approved
1 Rejected
2 Rejected
3 Rejected
4 Rejected
... ...
4264 Rejected
4265 Approved
4266 Rejected
4267 Approved
4268 Approved
print(df1.columns)
False
loan_id False
no_of_dependents False
education False
self_employed False
income_annum False
loan_amount False
loan_term False
cibil_score False
residential_assets_value False
commercial_assets_value False
luxury_assets_value False
bank_asset_value False
loan_status False
dtype: bool
df1.columns
if zero_cibil:
print("There are zero values in the anual income column.")
else:
print("There are no zero values in the anual income column.")
df1.columns = df1.columns.str.strip()
df1 = df1.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
df1['loan_status']=df1['loan_status'].replace({'Approved':1,'Rejected'
:0})
df1['education']=df1['education'].replace({'Not
Graduate':0,'Graduate':1})
df1['self_employed']=df1['self_employed'].replace({'No':0,'Yes':1})
df1
commercial_assets_value luxury_assets_value
bank_asset_value \
0 17600000 22700000 8000000
loan_status
0 1
1 0
2 0
3 0
4 0
... ...
4264 0
4265 1
4266 0
4267 1
4268 1
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
'''
this is coralrelation metrix of total numaric valus in this data set
loan status is only positively corelated with cbil score and loan
amount
'''
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
x = df1[['loan_amount', 'cibil_score']]
y = df1['loan_status']
Equation:
loan_status = -0.7122471905071295 + 0.00 * loan_amount + 0.00 *
cibil_score
Mean Squared Error: 0.09640870084539979
R-squared: 0.5874846987599525
# Logistic Regression
logistic_model = LogisticRegression()
logistic_model.fit(x_train, y_train)
y_pred_logistic = logistic_model.predict(x_test)
print("\nLogistic Regression:")
print("Accuracy:", accuracy_logistic)
print("Precision:", precision_logistic)
print("Recall:", recall_logistic)
print("F1 Score:", f1_logistic)
Logistic Regression:
Accuracy: 0.6932084309133489
Precision: 0.6856368563685636
Recall: 0.9440298507462687
F1 Score: 0.7943485086342229
# Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(x_train, y_train)
y_pred_nb = nb_model.predict(x_test)
print("\nNaïve Bayes:")
print("Accuracy:", accuracy_nb)
print("Precision:", precision_nb)
print("Recall:", recall_nb)
print("F1 Score:", f1_nb)
Naïve Bayes:
Accuracy: 0.7740046838407494
Precision: 0.7460545193687231
Recall: 0.9701492537313433
F1 Score: 0.8434712084347121
plt.figure(figsize=(6, 4))
plt.plot(fpr_logistic, tpr_logistic, color='blue', label=f'Logistic
Regression (AUC = {roc_auc_logistic:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.show()
plt.figure(figsize=(6, 4))
plt.plot(fpr_nb, tpr_nb, color='green', label=f'Naïve Bayes (AUC =
{roc_auc_nb:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.show()