Unit 2
Unit 2
Steps K-Nearest Neighbors (KNN) Linear Regression Logistic Regression Naive Bayes (Gaussian) Decision Tree Random Forest Gradient Descent Support Vector Machine (SVM)
Imports
from sklearn.neighbors import from sklearn.linear_model import from sklearn.linear_model import from sklearn.naive_bayes import from sklearn.tree import from sklearn.ensemble import from sklearn.linear_model import from sklearn.svm import SVC
KNeighborsClassifier LinearRegression LogisticRegression GaussianNB DecisionTreeClassifier RandomForestClassifier SGDClassifier
Loading Dataset
from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris
data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris()
X, y Division
X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data
y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target
Train-Test Split
from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import
train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split
X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size
=0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42)
Initializing Algorithm
knn = KNeighborsClassifier(n_neighbors lr = LinearRegression() log_reg = LogisticRegression(max_iter gnb = GaussianNB() dt = DecisionTreeClassifier(random_state rf = RandomForestClassifier(n_estimators sgd = SGDClassifier(max_iter=1000, tol=1 svm = SVC(kernel=’linear’)
=3) =200) =42) =100, random_state=42) e-3)
Fitting Model
knn.fit(X_train, y_train) lr.fit(X_train, y_train) log_reg.fit(X_train, y_train) gnb.fit(X_train, y_train) dt.fit(X_train, y_train) rf.fit(X_train, y_train) sgd.fit(X_train, y_train) svm.fit(X_train, y_train)
Predicting
y_pred = knn.predict(X_test) y_pred = lr.predict(X_test) y_pred = log_reg.predict(X_test) y_pred = gnb.predict(X_test) y_pred = dt.predict(X_test) y_pred = rf.predict(X_test) y_pred = sgd.predict(X_test) y_pred = svm.predict(X_test)
Accuracy Calculation
accuracy = knn.score(X_test, y_test) accuracy = lr.score(X_test, y_test) accuracy = log_reg.score(X_test, y_test) accuracy = gnb.score(X_test, y_test) accuracy = dt.score(X_test, y_test) accuracy = rf.score(X_test, y_test) accuracy = sgd.score(X_test, y_test) accuracy = svm.score(X_test, y_test)
print(f"KNN Accuracy: {accuracy}") print(f"Linear Regression R^2: {accuracy print(f"Logistic Regression Accuracy: { print(f"Naive Bayes Accuracy: {accuracy} print(f"Decision Tree Accuracy: { print(f"Random Forest Accuracy: { print(f"SGD Classifier Accuracy: { print(f"SVM Accuracy: {accuracy}")
}") accuracy}") ") accuracy}") accuracy}") accuracy}")
Complete Code
from sklearn.neighbors import from sklearn.linear_model import from sklearn.linear_model import from sklearn.naive_bayes import from sklearn.tree import from sklearn.ensemble import from sklearn.linear_model import from sklearn.svm import SVC
KNeighborsClassifier LinearRegression LogisticRegression GaussianNB DecisionTreeClassifier RandomForestClassifier SGDClassifier from sklearn.model_selection import
from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import train_test_split
train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split from sklearn.datasets import load_iris
from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris from sklearn.datasets import load_iris
data = load_iris()
data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() data = load_iris() X = data.data
X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data y = data.target
y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target X_train, X_test, y_train, y_test =
X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = train_test_split(X, y, test_size
train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size train_test_split(X, y, test_size =0.2, random_state=42)
=0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42) =0.2, random_state=42)
svm = SVC(kernel=’linear’)
knn = KNeighborsClassifier(n_neighbors lr = LinearRegression() log_reg = LogisticRegression(max_iter gnb = GaussianNB() dt = DecisionTreeClassifier(random_state rf = RandomForestClassifier(n_estimators sgd = SGDClassifier(max_iter=1000, tol=1 svm.fit(X_train, y_train)
=3) lr.fit(X_train, y_train) =200) gnb.fit(X_train, y_train) =42) =100, random_state=42) e-3) y_pred = svm.predict(X_test)
knn.fit(X_train, y_train) y_pred = lr.predict(X_test) log_reg.fit(X_train, y_train) y_pred = gnb.predict(X_test) dt.fit(X_train, y_train) rf.fit(X_train, y_train) sgd.fit(X_train, y_train)
y_pred = knn.predict(X_test) y_pred = log_reg.predict(X_test) y_pred = dt.predict(X_test) y_pred = rf.predict(X_test) y_pred = sgd.predict(X_test) accuracy = svm.score(X_test, y_test)
accuracy = lr.score(X_test, y_test) accuracy = gnb.score(X_test, y_test) print(f"SVM Accuracy: {accuracy}")
accuracy = knn.score(X_test, y_test) print(f"Linear Regression R^2: {accuracy accuracy = log_reg.score(X_test, y_test) print(f"Naive Bayes Accuracy: {accuracy} accuracy = dt.score(X_test, y_test) accuracy = rf.score(X_test, y_test) accuracy = sgd.score(X_test, y_test)
print(f"KNN Accuracy: {accuracy}") }") print(f"Logistic Regression Accuracy: { ") print(f"Decision Tree Accuracy: { print(f"Random Forest Accuracy: { print(f"SGD Classifier Accuracy: {
accuracy}") accuracy}") accuracy}") accuracy}")
• n neighbors: Specifies the number of neighbors to consider. Increasing ‘n neighbors‘ leads to a smoother decision boundary but might reduce model flexibility.
• weights: Determines how the neighbors influence the classification. Options include: - ‘uniform‘: All neighbors have equal weight. - ‘distance‘: Neighbors are weighted by their distance, with closer points having a greater influence.
• algorithm: Chooses the algorithm to compute nearest neighbors. Options include ‘auto‘, ‘ball tree‘, ‘kd tree‘, and ‘brute‘.
Theoretical Analysis: The K-Nearest Neighbors (KNN) algorithm is a non-parametric, instance-based learning technique. It classifies a new data point based on the majority class of its ’k’ nearest neighbors in the feature space. The algorithm is intuitive and simple to implement but can be computationally expensive with large datasets. The
choice of ’k’ significantly impacts its performance, and higher-dimensional data can lead to the ”curse of dimensionality”.
• solver: Chooses the algorithm to use for optimization. Examples include ‘liblinear‘, ‘saga‘, and ‘newton-cg‘.
• max iter: Specifies the maximum number of iterations for the solver to converge.
Theoretical Analysis: Logistic Regression is used for binary classification tasks, predicting the probability of class membership using a sigmoid function. It is simple and effective for linearly separable datasets but can struggle with complex or non-linear decision boundaries.
• var smoothing: Adds a small value to the variance for stability and to prevent zero variances.
Theoretical Analysis: Naive Bayes classifiers are based on the Bayes theorem, assuming independence between features. Despite this assumption, it performs surprisingly well in many applications, especially text classification. The Gaussian variant assumes normally distributed data, making it suitable for continuous input features.
• max depth: Sets the maximum depth of the tree. Limiting the depth can prevent overfitting.
Theoretical Analysis: Decision Trees are versatile models capable of handling both regression and classification tasks. They recursively split the dataset into subsets based on feature values to minimize entropy or impurity. While easy to interpret, they are prone to overfitting and can create complex models that generalize poorly.
• bootstrap: If ‘True‘, subsets of samples are used with replacement for training each tree.
Theoretical Analysis: Random Forest is an ensemble learning method that combines multiple decision trees to improve model performance and reduce overfitting. It offers robust results by averaging multiple tree predictions and using random feature subsets for training. However, it can be computationally intensive and less interpretable than
single trees.
• learning rate: Controls the step size during gradient descent. Options include ‘constant‘, ‘optimal‘, ‘invscaling‘, and ‘adaptive‘.
• max iter: Sets the maximum number of passes over the training data.
Theoretical Analysis: Gradient Descent is an optimization algorithm used for training models like linear and logistic regression. It iteratively updates model parameters in the direction of the negative gradient of the loss function. While it is efficient for large datasets, it can get stuck in local minima and may require careful tuning of learning rates.
• C: Regularization parameter controlling the trade-off between achieving a low error on training and testing datasets.
• gamma: Defines the influence of individual training examples on the decision boundary. A higher value results in a more flexible model.
Theoretical Analysis: Support Vector Machine (SVM) is a powerful classification algorithm that finds the hyperplane maximizing the margin between two classes. It is effective in high-dimensional spaces and can be used for non-linear classification with kernel tricks. However, it is sensitive to outliers and can be computationally expensive for
large datasets.
Conclusion
Understanding the significance of each algorithm’s function and its arguments is crucial for tuning models to achieve the best performance. Each argument allows users to control specific aspects of the model, influencing its behavior and outcomes on different datasets. Choosing appropriate values for these parameters can significantly enhance model
performance and generalization capabilities.
Steps K-Nearest Neighbors (Iris Linear Regression (California Logistic Regression (Breast Can- Naive Bayes (Iris Dataset) Decision Tree (Breast Cancer Random Forest (Breast Cancer Gradient Descent (California Support Vector Machine (Iris
Dataset) Housing Dataset) cer Dataset) Dataset) Dataset) Housing Dataset) Dataset)
Imports
from sklearn.neighbors import from sklearn.linear_model import from sklearn.linear_model import from sklearn.naive_bayes import from sklearn.tree import from sklearn.ensemble import from sklearn.linear_model import from sklearn.svm import SVC
KNeighborsClassifier LinearRegression LogisticRegression GaussianNB DecisionTreeClassifier RandomForestClassifier SGDRegressor
Loading Dataset
from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import
load_iris fetch_california_housing load_breast_cancer load_iris load_breast_cancer load_breast_cancer fetch_california_housing load_iris
data = load_iris() data = fetch_california_housing() data = load_breast_cancer() data = load_iris() data = load_breast_cancer() data = load_breast_cancer() data = fetch_california_housing() data = load_iris()
X, y Division
X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data
y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target
Train-Test Split
from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import
train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split
X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test =
train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y,
test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42)
Initializing Algorithm
knn = KNeighborsClassifier( lr = LinearRegression() log_reg = LogisticRegression( gnb = GaussianNB() dt = DecisionTreeClassifier( rf = RandomForestClassifier( sgd = SGDRegressor(max_iter=1000, svm = SVC(kernel=’linear’)
n_neighbors=3) max_iter=200) random_state=42) n_estimators=100, random_state tol=1e-3)
=42)
Fitting Model
knn.fit(X_train, y_train) lr.fit(X_train, y_train) log_reg.fit(X_train, y_train) gnb.fit(X_train, y_train) dt.fit(X_train, y_train) rf.fit(X_train, y_train) sgd.fit(X_train, y_train) svm.fit(X_train, y_train)
Predicting
y_pred = knn.predict(X_test) y_pred = lr.predict(X_test) y_pred = log_reg.predict(X_test) y_pred = gnb.predict(X_test) y_pred = dt.predict(X_test) y_pred = rf.predict(X_test) y_pred = sgd.predict(X_test) y_pred = svm.predict(X_test)
Accuracy Calculation
accuracy = knn.score(X_test, y_test) accuracy = lr.score(X_test, y_test) accuracy = log_reg.score(X_test, accuracy = gnb.score(X_test, y_test) accuracy = dt.score(X_test, y_test) accuracy = rf.score(X_test, y_test) accuracy = sgd.score(X_test, y_test) accuracy = svm.score(X_test, y_test)
print(f"KNN Accuracy: {accuracy}") print(f"Linear Regression R^2: { y_test) print(f"Naive Bayes Accuracy: { print(f"Decision Tree Accuracy: { print(f"Random Forest Accuracy: { print(f"SGD Regressor R^2: {accuracy print(f"SVM Accuracy: {accuracy}")
accuracy}") print(f"Logistic Regression Accuracy accuracy}") accuracy}") accuracy}") }")
: {accuracy}")
Complete Code
# K-Nearest Neighbors (Classification # Linear Regression (Regression on # Logistic Regression (Classification # Naive Bayes (Classification on Iris # Decision Tree (Classification on # Random Forest (Classification on # Stochastic Gradient Descent ( # Support Vector Machine (
on Iris Dataset) California Housing Dataset) on Breast Cancer Dataset) Dataset) Breast Cancer Dataset) Breast Cancer Dataset) Regression on California Housing Classification on Iris Dataset)
from sklearn.neighbors import from sklearn.linear_model import from sklearn.linear_model import from sklearn.naive_bayes import from sklearn.tree import from sklearn.ensemble import Dataset) from sklearn.svm import SVC
KNeighborsClassifier LinearRegression LogisticRegression GaussianNB DecisionTreeClassifier RandomForestClassifier from sklearn.linear_model import from sklearn.model_selection import
from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import SGDRegressor train_test_split
train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split from sklearn.model_selection import from sklearn.datasets import
from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import train_test_split load_iris
load_iris fetch_california_housing load_breast_cancer load_iris load_breast_cancer load_breast_cancer from sklearn.datasets import
fetch_california_housing data = load_iris()
data = load_iris() data = fetch_california_housing() data = load_breast_cancer() data = load_iris() data = load_breast_cancer() data = load_breast_cancer() X = data.data
X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data data = fetch_california_housing() y = data.target
y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target X = data.data X_train, X_test, y_train, y_test =
X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = y = data.target train_test_split(X, y,
train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, X_train, X_test, y_train, y_test = test_size=0.2, random_state=42)
test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) train_test_split(X, y,
test_size=0.2, random_state=42) svm = SVC(kernel=’linear’)
knn = KNeighborsClassifier( lr = LinearRegression() log_reg = LogisticRegression( gnb = GaussianNB() dt = DecisionTreeClassifier( rf = RandomForestClassifier( svm.fit(X_train, y_train)
n_neighbors=3) lr.fit(X_train, y_train) max_iter=200) gnb.fit(X_train, y_train) random_state=42) n_estimators=100, random_state sgd = SGDRegressor(max_iter=1000, y_pred = svm.predict(X_test)
knn.fit(X_train, y_train) y_pred = lr.predict(X_test) log_reg.fit(X_train, y_train) y_pred = gnb.predict(X_test) dt.fit(X_train, y_train) =42) tol=1e-3)
y_pred = knn.predict(X_test) y_pred = log_reg.predict(X_test) y_pred = dt.predict(X_test) rf.fit(X_train, y_train) sgd.fit(X_train, y_train) accuracy = svm.score(X_test, y_test)
accuracy = lr.score(X_test, y_test) accuracy = gnb.score(X_test, y_test) y_pred = rf.predict(X_test) y_pred = sgd.predict(X_test) print(f"SVM Accuracy: {accuracy}")
accuracy = knn.score(X_test, y_test) print(f"Linear Regression R^2: { accuracy = log_reg.score(X_test, print(f"Naive Bayes Accuracy: { accuracy = dt.score(X_test, y_test)
print(f"KNN Accuracy: {accuracy}") accuracy}") y_test) accuracy}") print(f"Decision Tree Accuracy: { accuracy = rf.score(X_test, y_test) accuracy = sgd.score(X_test, y_test)
print(f"Logistic Regression Accuracy accuracy}") print(f"Random Forest Accuracy: { print(f"SGD Regressor R^2: {accuracy
: {accuracy}") accuracy}") }")
Steps K-Nearest Neighbors (Iris Linear Regression (California Logistic Regression (Breast Can- Naive Bayes (Iris Dataset) Decision Tree (Breast Cancer Random Forest (Breast Cancer Gradient Descent (California Support Vector Machine (Iris
Dataset) Housing Dataset) cer Dataset) Dataset) Dataset) Housing Dataset) Dataset)
Imports
from sklearn.neighbors import from sklearn.linear_model import from sklearn.linear_model import from sklearn.naive_bayes import from sklearn.tree import from sklearn.ensemble import from sklearn.linear_model import from sklearn.svm import SVC
KNeighborsClassifier LinearRegression LogisticRegression GaussianNB DecisionTreeClassifier RandomForestClassifier SGDRegressor from sklearn.metrics import
from sklearn.metrics import from sklearn.metrics import from sklearn.metrics import from sklearn.metrics import from sklearn.metrics import from sklearn.metrics import from sklearn.metrics import accuracy_score, f1_score
accuracy_score, f1_score mean_squared_error, accuracy_score, f1_score, accuracy_score, f1_score accuracy_score, f1_score accuracy_score, f1_score mean_squared_error,
mean_absolute_error, r2_score classification_report mean_absolute_error, r2_score
Loading Dataset
from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import from sklearn.datasets import
load_iris fetch_california_housing load_breast_cancer load_iris load_breast_cancer load_breast_cancer fetch_california_housing load_iris
data = load_iris() data = fetch_california_housing() data = load_breast_cancer() data = load_iris() data = load_breast_cancer() data = load_breast_cancer() data = fetch_california_housing() data = load_iris()
X, y Division
X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data X = data.data
y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target y = data.target
Train-Test Split
from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import from sklearn.model_selection import
train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split train_test_split
X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test = X_train, X_test, y_train, y_test =
train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y, train_test_split(X, y,
test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42) test_size=0.2, random_state=42)
Initializing Algorithm
knn = KNeighborsClassifier( lr = LinearRegression() log_reg = LogisticRegression( gnb = GaussianNB() dt = DecisionTreeClassifier( rf = RandomForestClassifier( sgd = SGDRegressor(max_iter=1000, svm = SVC(kernel=’linear’)
n_neighbors=3) max_iter=200) random_state=42) n_estimators=100, random_state tol=1e-3)
=42)
Fitting Model
knn.fit(X_train, y_train) lr.fit(X_train, y_train) log_reg.fit(X_train, y_train) gnb.fit(X_train, y_train) dt.fit(X_train, y_train) rf.fit(X_train, y_train) sgd.fit(X_train, y_train) svm.fit(X_train, y_train)
Predicting
y_pred = knn.predict(X_test) y_pred = lr.predict(X_test) y_pred = log_reg.predict(X_test) y_pred = gnb.predict(X_test) y_pred = dt.predict(X_test) y_pred = rf.predict(X_test) y_pred = sgd.predict(X_test) y_pred = svm.predict(X_test)
Evaluation Metrics
accuracy = accuracy_score(y_test, mse = mean_squared_error(y_test, accuracy = accuracy_score(y_test, accuracy = accuracy_score(y_test, accuracy = accuracy_score(y_test, accuracy = accuracy_score(y_test, mse = mean_squared_error(y_test, accuracy = accuracy_score(y_test,
y_pred) y_pred) y_pred) y_pred) y_pred) y_pred) y_pred) y_pred)
f1 = f1_score(y_test, y_pred, mae = mean_absolute_error(y_test, f1 = f1_score(y_test, y_pred, f1 = f1_score(y_test, y_pred, f1 = f1_score(y_test, y_pred, f1 = f1_score(y_test, y_pred, mae = mean_absolute_error(y_test, f1 = f1_score(y_test, y_pred,
average=’weighted’) y_pred) average=’weighted’) average=’weighted’) average=’weighted’) average=’weighted’) y_pred) average=’weighted’)
print(f"KNN Accuracy: {accuracy}") r2 = r2_score(y_test, y_pred) report = classification_report( print(f"Naive Bayes Accuracy: { print(f"Decision Tree Accuracy: { print(f"Random Forest Accuracy: { r2 = r2_score(y_test, y_pred) print(f"SVM Accuracy: {accuracy}")
print(f"KNN F1 Score: {f1}") print(f"Linear Regression MSE: {mse} y_test, y_pred) accuracy}") accuracy}") accuracy}") print(f"SGD Regressor MSE: {mse}") print(f"SVM F1 Score: {f1}")
") print(f"Logistic Regression Accuracy print(f"Naive Bayes F1 Score: {f1}") print(f"Decision Tree F1 Score: {f1} print(f"Random Forest F1 Score: {f1} print(f"SGD Regressor MAE: {mae}")
print(f"Linear Regression MAE: {mae} : {accuracy}") ") ") print(f"SGD Regressor R\
") print(f"Logistic Regression F1 Score textsuperscript{2}: {r2}")
print(f"Linear Regression R\ : {f1}")
textsuperscript{2}: {r2}") print(report)
Evaluation Metrics
Accuracy
Accuracy is the ratio of correctly predicted observations to the total observations. It is suitable for balanced datasets but can be misleading for imbalanced classes.
R-Squared (R²)
R² measures the proportion of variance in the target variable explained by the model. It ranges from 0 to 1, with values closer to 1 indicating better performance.
F1 Score
F1 Score is the harmonic mean of precision and recall. It is more informative than accuracy for imbalanced datasets, as it considers both false positives and false negatives.