1stTask.ipynb - Colab
1stTask.ipynb - Colab
ipynb - Colab
Group Members
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (accuracy_score, classification_report,
confusion_matrix, precision_recall_curve,
PrecisionRecallDisplay)
from imblearn.over_sampling import SMOTE
from sklearn.utils import resample
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Numerical imputation
num_cols = data.select_dtypes(include=['number']).columns
data[num_cols] = data[num_cols].fillna(data[num_cols].mean())
# Categorical imputation
cat_cols = data.select_dtypes(exclude=['number']).columns.drop('covid19_test_results')
for col in cat_cols:
data[col] = data[col].fillna(data[col].mode()[0])
return data
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 1/6
4/8/25, 7:44 PM 1stTask.ipynb - Colab
# Downsample majority
majority_down = resample(majority,
replace=False,
n_samples=5000,
random_state=42)
# Main execution
data = load_and_preprocess('coronavirusdataset.csv')
balanced_data = balance_dataset(data)
# Prediction
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1] if hasattr(model, "predict_proba") else [0]*len(y_test)
# Evaluation
print(f"\n{name} Evaluation:")
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 2/6
4/8/25, 7:44 PM 1stTask.ipynb - Colab
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['Predicted Negative', 'Predicted Positive'],
yticklabels=['Actual Negative', 'Actual Positive'])
plt.title(f"{name} Confusion Matrix")
plt.show()
# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_proba)
disp = PrecisionRecallDisplay(precision=precision, recall=recall)
disp.plot()
plt.title(f"{name} Precision-Recall Curve")
plt.show()
plt.figure(figsize=(10,6))
top_features.sort_values().plot.barh(color='darkgreen')
plt.title(f"{name} - Top 10 Features")
plt.xlabel("Importance Score")
plt.tight_layout()
plt.show()
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 3/6
4/8/25, 7:44 PM 1stTask.ipynb - Colab
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 4/6
4/8/25, 7:44 PM 1stTask.ipynb - Colab
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 5/6
4/8/25, 7:44 PM 1stTask.ipynb - Colab
https://colab.research.google.com/drive/1W1rtHxuRmCvggLRwkGG5j0Ji0cJLpOKf#scrollTo=wjI5rECNSP2q&printMode=true 6/6