0% found this document useful (0 votes)
5 views17 pages

Aiml Practical

The document outlines various machine learning algorithms implemented in Python, including linear regression, support vector machines, decision trees, K-nearest neighbors, naive Bayes, dimensionality reduction, clustering, and random forests. Each section provides code examples for training models, making predictions, and evaluating accuracy using different datasets. The document serves as a comprehensive guide for implementing fundamental machine learning techniques.

Uploaded by

amantikriha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views17 pages

Aiml Practical

The document outlines various machine learning algorithms implemented in Python, including linear regression, support vector machines, decision trees, K-nearest neighbors, naive Bayes, dimensionality reduction, clustering, and random forests. Each section provides code examples for training models, making predictions, and evaluating accuracy using different datasets. The document serves as a comprehensive guide for implementing fundamental machine learning techniques.

Uploaded by

amantikriha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 17

Aim 1: Implement linear regression using python

import numpy as np
import matplotlib.pyplot as plt

def estimate_coef(x, y):

n = np.size(x)

mean_x = np.mean(x)
mean_y = np.mean(y)

SS_xy = np.sum(y*x) - n*mean_y*mean_x


SS_xx = np.sum(x*x) - n*mean_x*mean_x

b_1 = SS_xy / SS_xx


b_0 = mean_y - b_1*mean_x

return (b_0, b_1)

def plot_regression_line(x, y, b):

plt.scatter(x, y, color = "m",


marker = "o", s = 30)

y_pred = b[0] + b[1]*x

plt.plot(x, y_pred, color = "g")

plt.xlabel('x')
plt.ylabel('y')

plt.show()

def main():

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
b = estimate_coef(x, y)
print("this is value of b", type(b))
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))

plot_regression_line(x, y, b)

if _name_ == "_main_":
main()
Aim 2: Python Implementation of Support Vector Machine

from matplotlib.colors import ListedColormap


from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('Social_Network_Ad.csv')
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

X_train, X_test, y_train, y_test = train_test_split(


X, y, test_size=0.25, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

classifier = SVC(kernel='rbf', random_state=0)


classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracyScore = accuracy_score(y_test, y_pred)
print("accuracy is", accuracyScore)

X_set, y_set = X_test, y_test


X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1,
step=0.01),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha=0.75, cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=ListedColormap(('red', 'green'))(i), label=j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Aim 3: Write a program to demonstrate the working of the decision
tree-based ID3 algorithm. Use an appropriate data set for building the
decision tree and apply this knowledge to classify a new sample.

import pandas as pd
import math
import numpy as np

data = pd.read_csv("3-dataset.csv")
features = [feat for feat in data]
features.remove("answer")

class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""

def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))

def info_gain(examples, attr):


uniq = np.unique(examples[attr])
# print ("\n",uniq)
gain = entropy(examples)
# print ("\n",gain)
for u in uniq:
subdata = examples[examples[attr] == u]
# print ("\n",subdata)
sub_e = entropy(subdata)
gain -= (float(len(subdata)) / float(len(examples))) * sub_e
# print ("\n",gain)
return gain

def ID3(examples, attrs):


root = Node()

max_gain = 0
max_feat = ""
for feature in attrs:
# print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
# print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
# print ("\n",uniq)
for u in uniq:
# print ("\n",u)
subdata = examples[examples[max_feat] == u]
# print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)

return root
def printTree(root: Node, depth=0):
for i in range(depth):
print("\t", end="")
print(root.value, end="")
if root.isLeaf:
print(" -> ", root.pred)
print()
for child in root.children:
printTree(child, depth + 1)

def classify(root: Node, new):


for child in root.children:
if child.value == new[root.value]:
if child.isLeaf:
print("Predicted Label for new example",
new, " is:", child.pred)
exit
else:
classify(child.children[0], new)

root = ID3(data, features)


print("Decision Tree is:")
printTree(root)
print("------------------")

new = {"outlook": "sunny", "temperature": "hot",


"humidity": "normal", "wind": "strong"}
classify(root, new)
Aim 4: Write a program to implement K-Nearest Neighbors algorithm to
classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.

import sklearn
import pandas as pd
from sklearn.datasets import load_iris

iris=load_iris()
iris.keys()

df=pd.DataFrame(iris['data'])
print(df)
print(iris['target_names'])

iris['feature_names']

X=df
y=iris['target']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train,y_train)

import numpy as np

x_new=np.array([[5,2.9,1,0.2]])

prediction=knn.predict(x_new)

iris['target_names'][prediction]

from sklearn.metrics import confusion_matrix

from sklearn.metrics import accuracy_score


from sklearn.metrics import classification_report

y_pred=knn.predict(X_test)

cm=confusion_matrix(y_test,y_pred)

print(cm)

print(" correct prediction",accuracy_score(y_test,y_pred))

print(" wrong prediction",(1-accuracy_score(y_test,y_pred)))


Aim 5: Write a Program to implement the naive Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier few test data sets.

import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

data = pd.read_csv('tennisdata.csv')

X = data.iloc[:,:-1]

y = data.iloc[:,-1]

le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)

le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)

le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)

le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train data is :\n",X.head())

le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)

classifier = GaussianNB()
classifier.fit(X_train,y_train)

from sklearn.metrics import accuracy_score


print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
Aim 6: The probability that it is Friday and that a student is absent is 3%.
Since there are 5 school days in a week, the probability that it is Friday is
20%. What is the probability that a student is absent given that today is
Friday? Apply Bayes’ rule in python to get the result.

pAF=0.03
print("The probability that it is Friday and that a student is absent :", pAF)

pF=0.2
print("The probability that it is Friday : ", pF)

pResult = (pAF/pF)

print("The probability that a student is absent given that today is Friday : ", pResult *
100,"%")
Aim 7: Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets

import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

data = pd.read_csv('tennisdata.csv')

X = data.iloc[:,:-1]

y = data.iloc[:,-1]

le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)

le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)

le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)

le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train data is :\n",X.head())

le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)

classifier = GaussianNB()
classifier.fit(X_train,y_train)

from sklearn.metrics import accuracy_score


print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
Aim 8: Implement and demonstrate a program for dimensionality
reduction.

from numpy import mean


from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
n_redundant=10, random_state=7)
# define the pipeline
steps = [('pca', PCA(n_components=10)), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
# evaluate model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
Aim 9: Implement clustering Algorithm in python

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('clustering.csv')
data.head()

X = data[["LoanAmount","ApplicantIncome"]]

plt.scatter(X["ApplicantIncome"],X["LoanAmount"],c='black')
plt.xlabel('AnnualIncome')
plt.ylabel('Loan Amount (In Thousands)')
plt.show()

#number of clusters
K=3

# Select random observation as centroids


Centroids = (X.sample(n=K))
plt.scatter(X["ApplicantIncome"],X["LoanAmount"],c='black')
plt.scatter(Centroids["ApplicantIncome"],Centroids["LoanAmount"],c='red')
plt.xlabel('AnnualIncome')
plt.ylabel('Loan Amount (In Thousands)')
plt.show()

diff = 1
j=0

while(diff!=0):
XD=X
i=1
for index1,row_c in Centroids.iterrows():
ED=[]
for index2,row_d in XD.iterrows():
d1=(row_c["ApplicantIncome"]-row_d["ApplicantIncome"])**2
d2=(row_c["LoanAmount"]-row_d["LoanAmount"])**2
d=np.sqrt(d1+d2)
ED.append(d)
X[i]=ED
i=i+1
C=[]
for index,row in X.iterrows():
min_dist=row[1]
pos=1
for i in range(K):
if row[i+1] < min_dist:
min_dist = row[i+1]
pos=i+1
C.append(pos)
X["Cluster"]=C
Centroids_new =
X.groupby(["Cluster"]).mean()[["LoanAmount","ApplicantIncome"]]
if j == 0:
diff=1
j=j+1
else:
diff = (Centroids_new['LoanAmount'] - Centroids['LoanAmount']).sum() +
(Centroids_new['ApplicantIncome'] - Centroids['ApplicantIncome']).sum()
print(diff.sum())
Centroids = X.groupby(["Cluster"]).mean()[["LoanAmount","ApplicantIncome"]]

color=['blue','green','cyan']
for k in range(K):
data=X[X["Cluster"]==k+1]
plt.scatter(data["ApplicantIncome"],data["LoanAmount"],c=color[k])
plt.scatter(Centroids["ApplicantIncome"],Centroids["LoanAmount"],c='red')
plt.xlabel('Income')
plt.ylabel('Loan Amount (In Thousands)')
plt.show()
Aim 10: Write a program to Implementing a Random Forest classifier

from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,


n_redundant=5, random_state=3)

print(X.shape, y.shape)

# evaluate random forest algorithm for classification

from numpy import mean


from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier

# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
n_redundant=5, random_state=3)

# define the model


model = RandomForestClassifier()

# evaluate the model


cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1,
error_score='raise')

# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

# make predictions using random forest for classification


from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
n_redundant=5, random_state=3)
# define the model
model = RandomForestClassifier()
# fit the model on the whole dataset
model.fit(X, y)
# make a single prediction
row =
[[-8.52381793,5.24451077,-12.14967704,-2.92949242,0.99314133,0.67326595,-0.38657932,
1.27955683,-0.60712621,3.20807316,0.60504151,-1.38706415,8.92444588,-7.43027595,-2.3
3653219,1.10358169,0.21547782,1.05057966,0.6975331,0.26076035]]
yhat = model.predict(row)
print(‘Predicted Class: %d’ % yhat[0])

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy