0% found this document useful (0 votes)
0 views15 pages

Final ML Programs 075005

The document contains several Python programs that demonstrate various machine learning techniques using libraries such as scikit-learn, pandas, and matplotlib. Key programs include KMeans clustering on breast cancer data, Gaussian Naive Bayes classification on olive faces, decision tree classification, linear regression on California housing data, and local weighted regression. Additionally, it includes data visualization techniques such as scatter plots and histograms to analyze datasets.

Uploaded by

debugthebug336
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
0 views15 pages

Final ML Programs 075005

The document contains several Python programs that demonstrate various machine learning techniques using libraries such as scikit-learn, pandas, and matplotlib. Key programs include KMeans clustering on breast cancer data, Gaussian Naive Bayes classification on olive faces, decision tree classification, linear regression on California housing data, and local weighted regression. Additionally, it includes data visualization techniques such as scatter plots and histograms to analyze datasets.

Uploaded by

debugthebug336
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

Program 10

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer


from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposi on import PCA
from sklearn.metrics import classifica on_report,confusion_matrix,accuracy_score

data = load_breast_cancer()
X = data.data

Y = data.target

scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=2,random_state=42)
y_kmeans = kmeans.fit_predict(X_scaler)

print("Accuracy SCore: ")


print(accuracy_score(Y,y_kmeans))
print("Classifica on Report...")
print(classifica on_report(Y,y_kmeans))
print("Confusion Mateisx...")
print(confusion_matrix(Y,y_kmeans))

pca = PCA(n_components=2)
x_pca = pca.fit_transform(X_scaler)

df = pd.DataFrame(x_pca,columns=['PC1','PC2'])
df['cluster']= y_kmeans
df['True Labels'] = Y

plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.7,edgecolor='black')
plt. tle("Clustering on K-Means Cluster Algorithm")

plt.xlabel("Principal Component 1")


plt.ylabel("Principal Component 2")
plt.show()

plt.figure(figsize=(8,6))
sns.sca erplot(data=df, x='PC1',y='PC2',hue='True
Labels',s=100,pale e='coolwarm',alpha=0.7,edgecolor='black')
plt. tle("Clustering on True Labels")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")

plt.show()

plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.9,edgecolor='black')
centers = pca.transform(kmeans.cluster_centers_)
plt.sca er(x=centers[:,0],y=centers[:,1],color='red',marker='X',s=200,label='centroids')
plt. tle("Clustering on Kmeans along with Centroids")
plt.xlabel("Principal Component 1")

plt.ylabel("Principal Component 2")


plt. tle(label='Centroid')
plt.legend( tle='Cluster')
plt.show()

Program 9
import numpy as np

from sklearn.datasets import fetch_olive _faces


from sklearn.naive_bayes import GaussianNB
from sklearn.model_selec on import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selec on import cross_val_score

data = fetch_olive _faces(shuffle=True,random_state=42)


X = data.data
Y = data.target

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

gnb = GaussianNB()
gnb.fit(X_train,Y_train)

Y_pred = gnb.predict(X_test)
print(accuracy_score(Y_test,Y_pred))

print(confusion_matrix(Y_test,Y_pred))

print(classifica on_report(Y_test,Y_pred))

cross_val = cross_val_score(gnb,X,Y,cv=5,scoring='accuracy')
print(cross_val.mean())

fig, axes = plt.subplots(3,5,figsize=(12,6))


for ax, image, predic on, True_label in zip(axes.ravel(),X_train,Y_pred,Y_test):
ax.imshow(image.reshape(64,64),cmap=plt.cm.gray)
ax.set_ tle(f"True : {True_label}, Pred : {predic on}")
ax.axis('off')
plt.show()

Program 8
import numpy as np

from sklearn.datasets import load_breast_cancer


from sklearn.model_selec on import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree

data = load_breast_cancer()
X = data.data
Y = data.target

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train,Y_train)

Y_pred = clf.predict(X_test)

print(accuracy_score(Y_test,Y_pred) * 100)

new_sample = np.array([X_test[10]])
predict = clf.predict(new_sample)

if predict == 1:
predic on_class='Bengin'
elif predict == 0:
predic on_class='Malignant'

print("The Predicted Class Is ......",predic on_class)

plt.figure(figsize=(30,50))
tree.plot_tree(clf,feature_names = data.feature_names, class_names =
data.target_names,filled=True)
plt.show()

Progarm 7
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing


from sklearn.model_selec on import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline

def liner_reg():
data = fetch_california_housing(as_frame=True)
X = data.data[['AveRooms']]
Y = data.target

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

model = LinearRegression()
model.fit(X_train,Y_train)

Y_predict = model.predict(X_test)

print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))

print("R2 Error")
print(r2_score(Y_test,Y_predict))

plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')
plt.plot(X_test,Y_predict,color='red')
plt.show()

def poly():

url = 'h ps://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-


mpg.data'
names = ['mpg','cylinders','displacement','horse
power','weight','accelera on','model','origin','car name']

data = pd.read_csv(
url, sep='\s+',names=names,na_values='?')
data=data.dropna()
X = data['displacement'].values.reshape(-1,1)
Y = data['mpg'].values

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

model =
make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),LinearRegression())
model.fit(X_train,Y_train)

Y_predict = model.predict(X_test)

print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))

print("R2 Error")
print(r2_score(Y_test,Y_predict))
plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')

plt.plot(X_test,Y_predict,color='red')
plt.show()

if __name__=='__main__':
liner_reg()
poly()

program 6
import numpy as np
import matplotlib.pyplot as plt

def gaussian_kernel(x, xi, tau):


return np.exp(-(np.sum(x-xi)**2)/(2*tau**2))

def local_weight_reg(x, X, y, tau):

m = X.shape[0]

weights = np.array([gaussian_kernel(x,X[i],tau) for i in range(m)])

W = np.diag(weights)
X_trans_W = X.T @ W

theta = np.linalg.inv(X_trans_W @ X) @ X_trans_W @ y

return x @ theta
np.random.seed(42)

X = np.linspace(0,2*np.pi,100)
y = np.sin(X) + 0.1 *(np.random.randn(100))

X_bias = np.c_[np.ones(X.shape),X]

X_test = np.linspace(0,2*np.pi,200)
X_bias_test = np.c_[np.ones(X_test.shape),X_test]
tau =0.5

y_pred = np.array([local_weight_reg(x ,X_bias ,y ,tau) for x in X_bias_test])

plt.figure(figsize=(8,6))
plt.sca er(X,y,color='red',label='tarin')
plt.plot(X_test,y_pred,color='blue',label='test')
plt.grid(alpha=0.2)
plt.legend()
plt.show()

Program 5
import numpy as np

from sklearn.neighbors import KNeighborsClassifier


import matplotlib.pyplot as plt

data = np.random.rand(100)

labels = ['class1' if x<=0.5 else 'class2' for x in data[:50] ]


train_data = data[:50].reshape(-1,1)

train_labels = labels

test_data = data[50:].reshape(-1,1)

k_values = [1,2,3,4,5,20,30]

for k in k_values:

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(train_data,train_labels)

pred = knn.predict(test_data)

for i, pr in enumerate(pred,start=51):
print(f"Point x{i} ={test_data[i-51][0]:.2f} is class {pr}")

class1 = [test_data[i][0] for i in range(len(pred)) if pred[i] == 'class1']


class2 = [test_data[i][0] for i in range(len(pred)) if pred[i] == 'class2']

plt.figure(figsize=(8,6))

plt.sca er(train_data,[0]*len(train_data),c=['blue' if label=='class1' else 'red' for label in


train_labels],marker='o',label='training')
plt.sca er(class1,[1]*len(class1),c='blue',marker='X',label='clas1 (test)')
plt.sca er(class2,[1]*len(class2),c='red',marker='X',label='clas2 (test)')
plt. tle(f'k={k}')
plt.legend()
plt.show()
Program 4

import pandas as pd

def find_s_algo(filepath):
df = pd.read_csv(filepath)
print("Training Data")
print(df)

a ributes = df.columns[:-1]
class_label = df.columns[-1]

hypothesis = ['?' for _ in a ributes]

for index,row in df.iterrows():


if row[class_label] == 'Yes':
if '?' in hypothesis:
hypothesis = list(row[:-1])
else:
for i in range(len(hypothesis)):
if hypothesis[i] != row[i]:
hypothesis[i] = '?'
print(f' Step {index} Hypothessi : {hypothesis}')
return hypothesis

filepath= r"C:\Users\priya\Downloads\enjoysport_training_data.csv"
a= find_s_algo(filepath)
print("Finsl")
print(a)

Training Data
Sky Temp Humidity Wind Water Forecast EnjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
4 Sunny Warm Normal Strong Warm Same Yes
Step 0 Hypothessi : ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
Step 1 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
Step 3 Hypothessi : ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change']
Step 4 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Finsl
['Sunny', 'Warm', '?', 'Strong', '?', '?']

Program 1
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing(as_frame=True)
housing_df = data.frame
housing_df=housing_df.dropna()

numerical_features = housing_df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):

plt.subplot(3,3,i+1)
sns.histplot(housing_df[features],bins=30,kde=True,color='blue')
plt. tle('Distribu on of Features of California Hosusing')
plt. ght_layout()
plt.show()

plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):
plt.subplot(3,3,i+1)
sns.boxplot(housing_df[features],color='orange')
plt. tle("box plot")
plt. ght_layout()
plt.show()

outliers_sum = {}
for feature in numerical_features:
Q1 = housing_df[feature].quan le(0.25)
Q3 = housing_df[feature].quan le(0.75)
IQR = Q3 - Q1
lower_bound = Q1-1.5*IQR
upper_bound = Q3+1.5*IQR
outlier = housing_df[(housing_df[feature] < lower_bound) | (housing_df[feature] >
upper_bound) ]
outliers_sum[feature] = len(outlier)

print(f" Feature {feature} is {len(outlier)}...")


Program 2
import numpy as np

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing(as_frame=True)
housing_df = data.frame

comat = housing_df.corr()
plt.figure(figsize=(8,6))

sns.heatmap(comat,annot=True,cmap='coolwarm')
plt.show()

sns.pairplot(housing_df)
plt.show()

Program 3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposi on import PCA
from sklearn.datasets import load_iris

iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names

pca = PCA(n_components=2)
x_pca = pca.fit_transform(data)

df = pd.DataFrame(x_pca, columns= ['PC1','PC2'])


df['Labels'] = labels

colors = ['r','g','b']

plt.figure(figsize=(8,6))
for i, label in enumerate(np.unique(labels)):

plt.sca er(df[df['Labels']==label]['PC1'],df[df['Labels']==label]['PC2'],c=colors[i],label=labe
l_names[label])
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy