Aiml Practical
Aiml Practical
import numpy as np
import matplotlib.pyplot as plt
n = np.size(x)
mean_x = np.mean(x)
mean_y = np.mean(y)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
def main():
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
b = estimate_coef(x, y)
print("this is value of b", type(b))
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
plot_regression_line(x, y, b)
if _name_ == "_main_":
main()
Aim 2: Python Implementation of Support Vector Machine
dataset = pd.read_csv('Social_Network_Ad.csv')
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracyScore = accuracy_score(y_test, y_pred)
print("accuracy is", accuracyScore)
import pandas as pd
import math
import numpy as np
data = pd.read_csv("3-dataset.csv")
features = [feat for feat in data]
features.remove("answer")
class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""
def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))
max_gain = 0
max_feat = ""
for feature in attrs:
# print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
# print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
# print ("\n",uniq)
for u in uniq:
# print ("\n",u)
subdata = examples[examples[max_feat] == u]
# print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)
return root
def printTree(root: Node, depth=0):
for i in range(depth):
print("\t", end="")
print(root.value, end="")
if root.isLeaf:
print(" -> ", root.pred)
print()
for child in root.children:
printTree(child, depth + 1)
import sklearn
import pandas as pd
from sklearn.datasets import load_iris
iris=load_iris()
iris.keys()
df=pd.DataFrame(iris['data'])
print(df)
print(iris['target_names'])
iris['feature_names']
X=df
y=iris['target']
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
import numpy as np
x_new=np.array([[5,2.9,1,0.2]])
prediction=knn.predict(x_new)
iris['target_names'][prediction]
y_pred=knn.predict(X_test)
cm=confusion_matrix(y_test,y_pred)
print(cm)
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('tennisdata.csv')
X = data.iloc[:,:-1]
y = data.iloc[:,-1]
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
pAF=0.03
print("The probability that it is Friday and that a student is absent :", pAF)
pF=0.2
print("The probability that it is Friday : ", pF)
pResult = (pAF/pF)
print("The probability that a student is absent given that today is Friday : ", pResult *
100,"%")
Aim 7: Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('tennisdata.csv')
X = data.iloc[:,:-1]
y = data.iloc[:,-1]
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('clustering.csv')
data.head()
X = data[["LoanAmount","ApplicantIncome"]]
plt.scatter(X["ApplicantIncome"],X["LoanAmount"],c='black')
plt.xlabel('AnnualIncome')
plt.ylabel('Loan Amount (In Thousands)')
plt.show()
#number of clusters
K=3
diff = 1
j=0
while(diff!=0):
XD=X
i=1
for index1,row_c in Centroids.iterrows():
ED=[]
for index2,row_d in XD.iterrows():
d1=(row_c["ApplicantIncome"]-row_d["ApplicantIncome"])**2
d2=(row_c["LoanAmount"]-row_d["LoanAmount"])**2
d=np.sqrt(d1+d2)
ED.append(d)
X[i]=ED
i=i+1
C=[]
for index,row in X.iterrows():
min_dist=row[1]
pos=1
for i in range(K):
if row[i+1] < min_dist:
min_dist = row[i+1]
pos=i+1
C.append(pos)
X["Cluster"]=C
Centroids_new =
X.groupby(["Cluster"]).mean()[["LoanAmount","ApplicantIncome"]]
if j == 0:
diff=1
j=j+1
else:
diff = (Centroids_new['LoanAmount'] - Centroids['LoanAmount']).sum() +
(Centroids_new['ApplicantIncome'] - Centroids['ApplicantIncome']).sum()
print(diff.sum())
Centroids = X.groupby(["Cluster"]).mean()[["LoanAmount","ApplicantIncome"]]
color=['blue','green','cyan']
for k in range(K):
data=X[X["Cluster"]==k+1]
plt.scatter(data["ApplicantIncome"],data["LoanAmount"],c=color[k])
plt.scatter(Centroids["ApplicantIncome"],Centroids["LoanAmount"],c='red')
plt.xlabel('Income')
plt.ylabel('Loan Amount (In Thousands)')
plt.show()
Aim 10: Write a program to Implementing a Random Forest classifier
print(X.shape, y.shape)
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
n_redundant=5, random_state=3)
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))