0% found this document useful (0 votes)
15 views9 pages

ML Codes

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views9 pages

ML Codes

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

#Common imports for all algorithms

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, classification_report
import seaborn as sns

# 1. Linear Regression
print("\n1. Linear Regression")

# Import necessary libraries


from sklearn.linear_model import LinearRegression

# Load the data


data = pd.read_csv("question.csv")
print("Data shape:", data.shape)
print("Columns:", data.columns)
print("First few rows:")
print(data.head())

# Define variables
X = data[['feature1', 'feature2']]
y = data['target']

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Apply the algorithm


lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)

# Visualizations
plt.figure(figsize=(10, 6))
plt.scatter(y_test, lr_predictions)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Linear Regression: Actual vs Predicted')
plt.savefig('linear_regression_results.png')
plt.close()
# Print results
mse = mean_squared_error(y_test, lr_predictions)
print(f"Linear Regression Mean Squared Error: {mse}")
print(f"Coefficients: {lr_model.coef_}")
print(f"Intercept: {lr_model.intercept_}")

# 2. Logistic Regression
print("\n2. Logistic Regression")

# Import necessary libraries


from sklearn.linear_model import LogisticRegression

# Load the data


data = pd.read_csv("question.csv")

# Define variables
X = data[['feature1', 'feature2', 'categorical_feature']]
y = data['binary_target']

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='most_frequent')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Encode categorical variables


le = LabelEncoder()
X['categorical_feature'] = le.fit_transform(X['categorical_feature'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Apply the algorithm


log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg_predictions = log_reg.predict(X_test)

# Visualizations
cm = confusion_matrix(y_test, log_reg_predictions)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d')
plt.title('Logistic Regression: Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('logistic_regression_confusion_matrix.png')
plt.close()

# Print results
accuracy = accuracy_score(y_test, log_reg_predictions)
print(f"Logistic Regression Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report(y_test, log_reg_predictions))

# 3. Naive Bayes
print("\n3. Naive Bayes")

# Import necessary libraries


from sklearn.naive_bayes import GaussianNB

# Load the data


data = pd.read_csv("question.csv")

# Define variables
X = data[['feature1', 'feature2']]
y = data['categorical_target']

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Split the data (no scaling for Naive Bayes)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply the algorithm


nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)

# Visualizations
cm = confusion_matrix(y_test, nb_predictions)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d')
plt.title('Naive Bayes: Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('naive_bayes_confusion_matrix.png')
plt.close()

# Print results
accuracy = accuracy_score(y_test, nb_predictions)
print(f"Naive Bayes Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report(y_test, nb_predictions))

# 4. Decision Tree
print("\n4. Decision Tree")

# Import necessary libraries


from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree

# Load the data


data = pd.read_csv("question.csv")

# Classification
print("\nDecision Tree - Classification")

# Define variables
X = data[['feature1', 'feature2', 'categorical_feature']]
y = data['categorical_target']

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='most_frequent')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Encode categorical variables


le = LabelEncoder()
X['categorical_feature'] = le.fit_transform(X['categorical_feature'])

# Split the data (no scaling for Decision Trees)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply the algorithm


dt_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=5) # ID3
dt_classifier.fit(X_train, y_train)
dt_class_predictions = dt_classifier.predict(X_test)

# Visualizations
plt.figure(figsize=(20,10))
plot_tree(dt_classifier, feature_names=X.columns, class_names=dt_classifier.classes_, filled=True,
rounded=True)
plt.title('Decision Tree Classifier')
plt.savefig('decision_tree_classifier.png')
plt.close()

# Print results
accuracy = accuracy_score(y_test, dt_class_predictions)
print(f"Decision Tree (Classification) Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report(y_test, dt_class_predictions))

# Regression
print("\nDecision Tree - Regression")

# Define variables
X_reg = data[['feature1', 'feature2']]
y_reg = data['continuous_target']

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_reg = pd.DataFrame(imputer.fit_transform(X_reg), columns=X_reg.columns)

# Split the data


X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2,
random_state=42)
# Apply the algorithm
dt_regressor = DecisionTreeRegressor(max_depth=5) # CART
dt_regressor.fit(X_train_reg, y_train_reg)
dt_reg_predictions = dt_regressor.predict(X_test_reg)

# Visualizations
plt.figure(figsize=(10, 6))
plt.scatter(y_test_reg, dt_reg_predictions)
plt.plot([y_test_reg.min(), y_test_reg.max()], [y_test_reg.min(), y_test_reg.max()], 'r--', lw=2)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Decision Tree Regression: Actual vs Predicted')
plt.savefig('decision_tree_regression_results.png')
plt.close()

# Print results
mse = mean_squared_error(y_test_reg, dt_reg_predictions)
print(f"Decision Tree (Regression) Mean Squared Error: {mse}")

# 5. K-means Clustering
print("\n5. K-means Clustering")

# Import necessary libraries


from sklearn.cluster import KMeans

# Load the data


data = pd.read_csv("question.csv")

# Define variables
X = data[['feature1', 'feature2']]

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply the algorithm


# Elbow method
inertias = []
for k in range(1, 11):
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
inertias.append(kmeans.inertia_)

# Visualizations
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), inertias, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.savefig('kmeans_elbow_method.png')
plt.close()

# K-means with optimal k


optimal_k = 3 # Assume we found this from the elbow method
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)

# Visualize clusters
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], color='red', marker='x', s=200,
linewidths=3)
plt.title('K-means Clustering')
plt.colorbar(scatter)
plt.savefig('kmeans_clusters.png')
plt.close()

# Print results
print(f"K-means clustering completed with {optimal_k} clusters")
print(f"Cluster centers:\n{kmeans.cluster_centers_}")

# 6. Hierarchical Clustering
print("\n6. Hierarchical Clustering")

# Import necessary libraries


from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

# Load the data


data = pd.read_csv("question.csv")

# Define variables
X = data[['feature1', 'feature2']]

# Preprocessing
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply the algorithm


hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(X_scaled)

# Visualizations
# Dendrogram
linked = linkage(X_scaled, method='ward')
plt.figure(figsize=(10, 7))
dendrogram(linked)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Distance')
plt.savefig('hierarchical_dendrogram.png')
plt.close()

# Scatter plot of clusters


plt.figure(figsize=(10, 7))
scatter = plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=hierarchical_labels, cmap='viridis')
plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1 (scaled)')
plt.ylabel('Feature 2 (scaled)')
plt.colorbar(scatter)
plt.savefig('hierarchical_clusters.png')
plt.close()

# Print results
print(f"Hierarchical clustering completed with 3 clusters")
print(f"Cluster labels: {np.unique(hierarchical_labels)}")

print("\nAll tasks completed successfully!")


/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.impute import SimpleImputer
import seaborn as sns

# Load the data


data = pd.read_csv("question.csv")

# 1. Pre-processing
# Missing value imputation
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Standardization
scaler = StandardScaler()
data_standardized = pd.DataFrame(scaler.fit_transform(data_imputed), columns=data_imputed.columns)

# Normalization
normalizer = Normalizer()
data_normalized = pd.DataFrame(normalizer.fit_transform(data_imputed),
columns=data_imputed.columns)

# 2. Visualization
# Bar chart
plt.figure(figsize=(10, 6))
data['categorical_column'].value_counts().plot(kind='bar')
plt.title('Bar Chart')
plt.show()

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(data['x_column'], data['y_column'])
plt.title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

# Heat map
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Line graph
plt.figure(figsize=(10, 6))
plt.plot(data['x_column'], data['y_column'])
plt.title('Line Graph')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

# 3. Linear Regression
X = data[['feature1', 'feature2']]
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)

# 4. Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg_predictions = log_reg.predict(X_test)

# 5. Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)

# 6. Decision Tree (ID3/CART)


# For classification
dt_classifier = DecisionTreeClassifier(criterion='entropy') # ID3
dt_classifier.fit(X_train, y_train)
dt_class_predictions = dt_classifier.predict(X_test)

# For regression
dt_regressor = DecisionTreeRegressor() # CART
dt_regressor.fit(X_train, y_train)
dt_reg_predictions = dt_regressor.predict(X_test)

# 7. K-means Clustering
# Elbow method
inertias = []
for k in range(1, 11):
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X)
inertias.append(kmeans.inertia_)

plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), inertias, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

# K-means with optimal k


optimal_k = 3 # Assume we found this from the elbow method
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans_labels = kmeans.fit_predict(X)

# 8. Hierarchical Clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(X)

# Visualize hierarchical clustering


plt.figure(figsize=(10, 7))
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=hierarchical_labels, cmap='viridis')
plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy