Loan ML Complete Guide
Loan ML Complete Guide
======================================
bucket = "my-data-bucket"
key = "loan/loan_data.csv"
s3 = boto3.client("s3")
obj = s3.get_object(Bucket=bucket, Key=key)
df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
# For Parquet:
# df = pd.read_parquet(BytesIO(obj["Body"].read()))
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
SECTION 7 · POLYNOMIAL / INTERACTIONS
---------------------------------------
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)
df_poly = pd.DataFrame(poly.fit_transform(df[num_cols]),
columns=poly.get_feature_names_out(num_cols))
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
rf_model.fit(X_train, y_train)
joblib.dump(rf_model, "rf_full_pipeline.pkl")
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
# 2 Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
# 3 Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=500, random_state=42).fit(X_train, y_train)
# 4 Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier().fit(X_train, y_train)
# 5 XGBoost
import xgboost as xgb
xgb_clf = xgb.XGBClassifier(random_state=42).fit(X_train, y_train)
# 6 LightGBM
import lightgbm as lgb
lgb_clf = lgb.LGBMClassifier(random_state=42).fit(X_train, y_train)
# 8 k-Nearest Neighbours
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=15).fit(X_train, y_train)
# 9 Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB().fit(X_train, y_train)
------------------------------------------------------------
END OF COOKBOOK