Final ML Programs 075005
Final ML Programs 075005
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data = load_breast_cancer()
X = data.data
Y = data.target
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=2,random_state=42)
y_kmeans = kmeans.fit_predict(X_scaler)
pca = PCA(n_components=2)
x_pca = pca.fit_transform(X_scaler)
df = pd.DataFrame(x_pca,columns=['PC1','PC2'])
df['cluster']= y_kmeans
df['True Labels'] = Y
plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.7,edgecolor='black')
plt. tle("Clustering on K-Means Cluster Algorithm")
plt.figure(figsize=(8,6))
sns.sca erplot(data=df, x='PC1',y='PC2',hue='True
Labels',s=100,pale e='coolwarm',alpha=0.7,edgecolor='black')
plt. tle("Clustering on True Labels")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.show()
plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.9,edgecolor='black')
centers = pca.transform(kmeans.cluster_centers_)
plt.sca er(x=centers[:,0],y=centers[:,1],color='red',marker='X',s=200,label='centroids')
plt. tle("Clustering on Kmeans along with Centroids")
plt.xlabel("Principal Component 1")
Program 9
import numpy as np
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
gnb = GaussianNB()
gnb.fit(X_train,Y_train)
Y_pred = gnb.predict(X_test)
print(accuracy_score(Y_test,Y_pred))
print(confusion_matrix(Y_test,Y_pred))
print(classifica on_report(Y_test,Y_pred))
cross_val = cross_val_score(gnb,X,Y,cv=5,scoring='accuracy')
print(cross_val.mean())
Program 8
import numpy as np
data = load_breast_cancer()
X = data.data
Y = data.target
clf = DecisionTreeClassifier()
clf.fit(X_train,Y_train)
Y_pred = clf.predict(X_test)
print(accuracy_score(Y_test,Y_pred) * 100)
new_sample = np.array([X_test[10]])
predict = clf.predict(new_sample)
if predict == 1:
predic on_class='Bengin'
elif predict == 0:
predic on_class='Malignant'
plt.figure(figsize=(30,50))
tree.plot_tree(clf,feature_names = data.feature_names, class_names =
data.target_names,filled=True)
plt.show()
Progarm 7
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def liner_reg():
data = fetch_california_housing(as_frame=True)
X = data.data[['AveRooms']]
Y = data.target
model = LinearRegression()
model.fit(X_train,Y_train)
Y_predict = model.predict(X_test)
print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))
print("R2 Error")
print(r2_score(Y_test,Y_predict))
plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')
plt.plot(X_test,Y_predict,color='red')
plt.show()
def poly():
data = pd.read_csv(
url, sep='\s+',names=names,na_values='?')
data=data.dropna()
X = data['displacement'].values.reshape(-1,1)
Y = data['mpg'].values
model =
make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),LinearRegression())
model.fit(X_train,Y_train)
Y_predict = model.predict(X_test)
print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))
print("R2 Error")
print(r2_score(Y_test,Y_predict))
plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')
plt.plot(X_test,Y_predict,color='red')
plt.show()
if __name__=='__main__':
liner_reg()
poly()
program 6
import numpy as np
import matplotlib.pyplot as plt
m = X.shape[0]
W = np.diag(weights)
X_trans_W = X.T @ W
return x @ theta
np.random.seed(42)
X = np.linspace(0,2*np.pi,100)
y = np.sin(X) + 0.1 *(np.random.randn(100))
X_bias = np.c_[np.ones(X.shape),X]
X_test = np.linspace(0,2*np.pi,200)
X_bias_test = np.c_[np.ones(X_test.shape),X_test]
tau =0.5
plt.figure(figsize=(8,6))
plt.sca er(X,y,color='red',label='tarin')
plt.plot(X_test,y_pred,color='blue',label='test')
plt.grid(alpha=0.2)
plt.legend()
plt.show()
Program 5
import numpy as np
data = np.random.rand(100)
train_labels = labels
test_data = data[50:].reshape(-1,1)
k_values = [1,2,3,4,5,20,30]
for k in k_values:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(train_data,train_labels)
pred = knn.predict(test_data)
for i, pr in enumerate(pred,start=51):
print(f"Point x{i} ={test_data[i-51][0]:.2f} is class {pr}")
plt.figure(figsize=(8,6))
import pandas as pd
def find_s_algo(filepath):
df = pd.read_csv(filepath)
print("Training Data")
print(df)
a ributes = df.columns[:-1]
class_label = df.columns[-1]
filepath= r"C:\Users\priya\Downloads\enjoysport_training_data.csv"
a= find_s_algo(filepath)
print("Finsl")
print(a)
Training Data
Sky Temp Humidity Wind Water Forecast EnjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
4 Sunny Warm Normal Strong Warm Same Yes
Step 0 Hypothessi : ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
Step 1 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
Step 3 Hypothessi : ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change']
Step 4 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Finsl
['Sunny', 'Warm', '?', 'Strong', '?', '?']
Program 1
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)
housing_df = data.frame
housing_df=housing_df.dropna()
numerical_features = housing_df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):
plt.subplot(3,3,i+1)
sns.histplot(housing_df[features],bins=30,kde=True,color='blue')
plt. tle('Distribu on of Features of California Hosusing')
plt. ght_layout()
plt.show()
plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):
plt.subplot(3,3,i+1)
sns.boxplot(housing_df[features],color='orange')
plt. tle("box plot")
plt. ght_layout()
plt.show()
outliers_sum = {}
for feature in numerical_features:
Q1 = housing_df[feature].quan le(0.25)
Q3 = housing_df[feature].quan le(0.75)
IQR = Q3 - Q1
lower_bound = Q1-1.5*IQR
upper_bound = Q3+1.5*IQR
outlier = housing_df[(housing_df[feature] < lower_bound) | (housing_df[feature] >
upper_bound) ]
outliers_sum[feature] = len(outlier)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)
housing_df = data.frame
comat = housing_df.corr()
plt.figure(figsize=(8,6))
sns.heatmap(comat,annot=True,cmap='coolwarm')
plt.show()
sns.pairplot(housing_df)
plt.show()
Program 3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposi on import PCA
from sklearn.datasets import load_iris
iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names
pca = PCA(n_components=2)
x_pca = pca.fit_transform(data)
colors = ['r','g','b']
plt.figure(figsize=(8,6))
for i, label in enumerate(np.unique(labels)):
plt.sca er(df[df['Labels']==label]['PC1'],df[df['Labels']==label]['PC2'],c=colors[i],label=labe
l_names[label])
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()