Name: Dhruvil K Kotecha ID No.: 17CP024 Sub. Code: CP-402 Sub. Name: ADT Semester: 7 Year: 2020/21
Name: Dhruvil K Kotecha ID No.: 17CP024 Sub. Code: CP-402 Sub. Name: ADT Semester: 7 Year: 2020/21
LAB 2 ......................................................................................................................8
AIM: WRITE A PROGRAM TO APPLY DECISION TREE METHOD USING INFORMATION GAIN ON GIVEN DATASET; ............. 8
LAB 3 ....................................................................................................................12
AIM: WRITE A PROGRAM TO APPLY DECISION TREE METHOD USING GINI INDEX ON GIVEN DATASET;......................... 12
LAB 4 ....................................................................................................................14
AIM: WRITE A PROGRAM TO PERFORM NAIVE-BAYES CLASSIFICATION ALGORITHM .................................................... 14
LAB 5 ....................................................................................................................16
AIM: WRITE A PROGRAM TO PERFORM K-NN CLASSIFICATION USING MIN- MAX NORMALIZATION. ............................. 16
LAB 6 ....................................................................................................................18
AIM: SIMPLE K-NN CLASSIFICATION ALGORITHM .......................................................................................................... 18
LAB 7 ....................................................................................................................18
AIM: WRITE A PROGRAM TO PERFORM APRIORI ALGORITHM......................................................................................... 19
LAB 8 ....................................................................................................................22
AIM: WRITE A PROGRAM TO PERFORM FP GROWTH ALGORITHM ................................................................................... 22
LAB 9 ....................................................................................................................26
AIM: WRITE A PROGRAM TO PERFORM K-MEANS CLUSTERING ALGORITHM .................................................................. 26
LAB 10 ..................................................................................................................29
AIM: WRITE A PROGRAM TO PERFORM K-MEDOIDS ALGORITHM .................................................................................... 29
1 |P a g e
17CP024 CP402
Lab 1 30/7/2020
Program 1
Aim: Write a program in C++ to read random value of 100 integer/float array
and find mean, median, mode, range, variance and standard deviation.
Code:
#include<iostream>
#include<math.h>
#include <bits/stdc++.h>
using namespace std;
int main()
{
int n;
cin>>n;
int *a=new int [n];
int i;
for(i=0;i<n;i++)
a[i]=rand() % 100;
int sum=0;
for(i=0;i<n;i++)
sum+=a[i];
float mean= sum/n;
sort(a,a+n);
float median;
median=(float)(a[(n-1)/2] + a[n/2])/2.0;
cout<<"mean="<<mean<<endl;
cout<<"median"<<median<<endl;
int max = *max_element(a, a + n);
int min = *min_element(a,a+n);
int *cnt= new int [max+1];
for(i=0;i<max+1;i++)
{
cnt[i]=0;
}
for(i=0;i<max+1;i++)
cnt[a[i]]++;
int mode=*max_element(cnt,cnt+max+1);
cout<<"mode="<<mode<<endl;
float vari;
for (int i = 0; i < n; i++)
vari += (a[i] - mean) * (a[i] - mean);
float variance=vari/n;
float sd=sqrt(variance);
cout<<"variance="<<variance<<endl;
cout<<"standard deviation="<<sd<<endl;
cout<<"range="<<max-min;
}
2 |P a g e
17CP024 CP402
Output:
3 |P a g e
17CP024 CP402
Program 2
Code:
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <sstream>
#include <string>
#include <cmath>
using namespace std;
int main(){
fstream fin, fout;
fin.open("D://sem7//cp402_lab//lab_1//road-weather-information-stations_
Final.csv", ios::in);
fout.open("Practical.csv", ios::out);
int i, n = 8004;
vector<string> row;
string line, word;
float m1=0, m2=0, std1, std2;
float min1=1000, min2=1000, max1=0, max2=0;
for(i=0;i<8005;i++){
row.clear();
getline(fin, line);
stringstream s(line);
if (row[0] == "StationName"){
continue;
}
float temp = ::atof(row[6].c_str());
float temp1 = ::atof(row[7].c_str());
m1 += temp;
m2 += temp1;
if (temp1 < min1){
min1 = temp;
}
if (temp > max1){
4 |P a g e
17CP024 CP402
max1 = temp;
}
if (temp1 < min2){
min2 = temp1;
}
if (temp1 > max2){
max2 = temp1;
}
}
m1 = m1 / n;
m2 = m2 / n;
int div1=1, div2=1;
int x1 = static_cast<int>(max1);
int x2 = static_cast<int>(max2);
fin.open("road-weather-information-stations_Final.csv", ios::in);
float var1=0, var2=0;
for(i=0;i<8005;i++){
row.clear();
getline(fin, line);
stringstream s(line);
fin.close();
fin.open("road-weather-information-stations_Final.csv", ios::in);
for(i=0;i<8005;i++){
row.clear();
getline(fin, line);
stringstream s(line);
float tp = ::atof(row[6].c_str());
float tp1 = ::atof(row[7].c_str());
fout<<row[6]<<",";
fout<<row[7]<<",";
fout<<(tp-min1)/dif1<<",";
fout<<(tp1-min2)/dif2<<",";
fout<<(tp-m1)/std1<<",";
fout<<(tp1-m2)/std2<<",";
fout<<tp/div1<<",";
fout<<tp1/div2<<"\n";
}
fin.close();
fout.close();
}
Output:
6 |P a g e
17CP024 CP402
7 |P a g e
17CP024 CP402
Lab 2 13/8/2020
Program
Aim: Write a program to apply decision tree method using Information gain on
given Dataset;
Code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import copy
dataset = pd.read_csv('dataset3.csv')
X = dataset.iloc[:, 1:].values
print(X)
attribute = ['outlook', 'temp', 'humidity', 'wind']
class Node(object):
def __init__(self):
self.value = None
self.decision = None
self.childs = None
x = yes/(yes+no)
y = no/(yes+no)
if x != 0 and y != 0:
entropy = -1 * (x*math.log2(x) + y*math.log2(y))
if x == 1:
ans = 1
if y == 1:
ans = 0
return entropy, ans
8 |P a g e
17CP024 CP402
for j in columns:
mydict = {}
idx = j
for i in rows:
key = data[i][idx]
if key not in mydict:
mydict[key] = 1
else:
mydict[key] = mydict[key] + 1
gain = entropy
# print(mydict)
for key in mydict:
yes = 0
no = 0
for k in rows:
if data[k][j] == key:
if data[k][-1] == 'Yes':
yes = yes + 1
else:
no = no + 1
# print(yes, no)
x = yes/(yes+no)
y = no/(yes+no)
# print(x, y)
if x != 0 and y != 0:
gain += (mydict[key] * (x*math.log2(x) + y*math.log2(y)))/14
# print(gain)
if gain > maxGain:
# print("hello")
maxGain = gain
retidx = j
root.value = attribute[idx]
mydict = {}
for i in rows:
key = data[i][idx]
if key not in mydict:
mydict[key] = 1
else:
mydict[key] += 1
newcolumns = copy.deepcopy(columns)
newcolumns.remove(idx)
for key in mydict:
newrows = []
for i in rows:
if data[i][idx] == key:
newrows.append(i)
print(newrows)
temp = buildTree(data, newrows, newcolumns)
temp.decision = key
root.childs.append(temp)
return root
def traverse(root):
print(root.decision)
print(root.value)
n = len(root.childs)
if n > 0:
for i in range(0, n):
traverse(root.childs[i])
def calculate():
rows = [i for i in range(0, 14)]
columns = [i for i in range(0, 4)]
root = buildTree(X, rows, columns)
root.decision = 'Start'
traverse(root)
10 | P a g e
17CP024 CP402
calculate()
11 | P a g e
17CP024 CP402
Lab 3 20/8/2020
Program
Aim: Write a program to apply decision tree method using Gini Index on given
Dataset;
Code:
import pandas as pd
from collections import Counter
import math
gini_indexes = []
for set in sets:
index = 0
for s in set:
grp = data_frame.copy(deep=True)
grp = grp.set_index(attribute)
grp = grp.loc[s]
gini_d = (len(grp) / data_len) * (gi(grp[tr]))
index += gini_d
gini_indexes.append(index)
return min(gini_indexes)
if len(classes) == 1:
return list(classes.keys())[0]
else:
ginis = [sgi(data_frame, attr, tr) for attr in attributes]
print(ginis)
min_gini_index = ginis.index(min(ginis))
root_node = attributes[min_gini_index]
print(root_node)
tree = {root_node: {}}
attributes.remove(root_node)
return tree
def sub(list):
12 | P a g e
17CP024 CP402
subs = []
classes = Counter(c for c in list)
elements = [x for x in classes.keys()]
tr = "BUY COMPUTER"
df = pd.DataFrame(data)
decision_tree = ct(df, features, tr)
print(decision_tree)
Output:
13 | P a g e
17CP024 CP402
Lab 4 3/9/2020
Program
Code:
import pandas as pd
import numpy as np
dataset = pd.read_csv("nb.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
y = y.reshape(-1,1)
print(y)
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
X[:, 1] = labelencoder_X.fit_transform(X[:, 1])
X[:, 2] = labelencoder_X.fit_transform(X[:, 2])
X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
c1=np.array(X[:,0])
c2=np.array(X[:,1])
c3=np.array(X[:,2])
c4=np.array(X[:,3])
print(c1)
print(c2)
print(c3)
print(c4)
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
features=zip(c1,c2,c3,c4)
k = list(features)
m = np.array(k)
model = GaussianNB()
model.fit(m,y)
answer=model.predict([[0,2,1,1]])
print(answer)
14 | P a g e
17CP024 CP402
Output:
15 | P a g e
17CP024 CP402
Lab 5
Program
Code:
import math
arr=[25,35,45,20,35,52,23,40,60,48,33]
que=int(input("enter age = "))
mn = min(arr)
mx = max(arr)
d=mx-mn
ans=[]
for i in arr:
t=i-mn
t=t/d
ans.append(t)
que=(que-mn)/d
arr2=[40000,60000,80000,20000,120000,18000,95000,62000,100000,220000,1500
00]
arr3=["N","N","N","N","N","N","Y","Y","Y","Y","Y"]
que2=int(input("enter Loan = "))
mn2 = min(arr2)
mx2 = max(arr2)
d2=mx2-mn2
ans2=[]
for i in arr2:
t=i-mn2
t=t/d2
ans2.append(t)
que2=(que2-mn2)/d2
No=0
Yes=0
dist=[]
for i in range(len(ans)):
an = math.sqrt((ans[i]-que)**2 + (ans2[i]-que2)**2)
dist.append(an)
Output:
17 | P a g e
17CP024 CP402
Lab 6
Aim: Simple k-NN classification algorithm
import math
ans=[25,35,45,20,35,52,23,40,60,48,33]
que=48
ans2=[40000,60000,80000,20000,120000,18000,95000,62000,100000,220000,15
0000]
que2=142000
arr3=["N","N","N","N","N","N","Y","Y","Y","Y","Y"]
No=0
Yes=0
dist=[]
for i in range(len(ans)):
an = math.sqrt((ans[i]-que)**2 + (ans2[i]-que2)**2)
dist.append(an)
k=int(input("enter value of k = "))
for i in range(0,k):
print("min number " + str(i) + " is " + str(min(dist)))
id=dist.index(min(dist))
x=arr3[id]
if x == "N":
No=No+1
if x == "Y":
Yes=Yes+1
dist.remove(min(dist))
print("Yes is ", str(Yes))
print("No is ", str(No))
if Yes>No:
print("Answer is Yes")
else:
print("Answer is No")
Output:
18 | P a g e
17CP024 CP402
Lab 7
Program
Code:
from itertools import chain, combinations
from collections import defaultdict
def subsets(arr):
return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])
return _itemSet
def getItemSetTransactionList(data_iterator):
transactionList = list()
itemSet = set()
for record in data_iterator:
transaction = frozenset(record)
transactionList.append(transaction)
for item in transaction:
itemSet.add(frozenset([item]))
return itemSet, transactionList
def retsupport(item):
return float(freqSet[item])
toRetItems = []
for key, value in largeSet.items():
toRetItems.extend([(tuple(item), retsupport(item))
for item in value])
toRetRules = []
for key, value in list(largeSet.items())[1:]:
for item in value:
_subsets = map(frozenset, [x for x in subsets(item)])
for element in _subsets:
remain = item.difference(element)
if((len(element)+len(remain))==(k-2)):
if len(remain) > 0:
confidence = retsupport(item)/retsupport(element)
if confidence >= minConfidence:
toRetRules.append(((tuple(element),
tuple(remain)),
confidence))
return toRetItems, toRetRules
def printResults(rules):
print("\n------------------------ RULES:")
for rule, confidence in sorted(rules, key=lambda x: x[1]):
pre, post = rule
print("Rule: %s ==> %s , %.3f" % (str(pre), str(post), confidence))
if __name__ == "__main__":
dataset = [
['bread', 'milk'],
['bread', 'diaper', 'beer', 'egg'],
['milk', 'diaper', 'beer', 'cola'],
['bread', 'milk', 'diaper', 'beer'],
['bread', 'milk', 'diaper', 'cola'],
]
20 | P a g e
17CP024 CP402
minSupport = 2
minConfidence = 0.60
Output:
21 | P a g e
17CP024 CP402
Lab 8
Program
Code:
import csv
import numpy as np
def Frequency(data,mfreq,combi_values):
rep=[0]*len(combi_values)
#print(rep)
for i in data:
for j in range(len(combi_values)):
if combi_values[j] in i:
rep[j]+=1
rem=[]
#print(combi_values,rep)
for i in range(len(rep)):
if(rep[i]<mfreq):
rem.append(i)
#print("Here",combi_values)
uniq_freq=list(set(rep))
uniq_freq.sort(reverse=True)
for i in uniq_freq:
for j in range(len(rep)):
if(rep[j]==i):
new_list.append(combi_values[j])
#print(new_list)
return new_list
def Unique(data):
22 | P a g e
17CP024 CP402
val=[]
for i in data:
for j in i:
#print(val,i)
if j not in val:
val.append(j)
#val=list(set(val))
if('' in val):
del val[val.index('')]
return val
#FPTREE:
def fptree(Data,mfreq,uni_values):
#FP1
fp1=Frequency(Data,mfreq,uni_values)
#print(fp1)
#FP2
freq_pattern=[]
maxl=0
for i in Data:
entry=[]
for j in fp1:
if(j in i):
entry.append(j)
if(maxl<len(entry)):
maxl=len(entry)
freq_pattern.append(entry)
#print(freq_pattern)
#FP3
ftree=[]
for i in range(maxl):
ftree.append([])
#print(freq_pattern)
for i in freq_pattern:
for j in range(len(i)):
flag=0
if ftree[j]==[]:
ftree[j].append([i[j],1])
else:
for k in ftree[j]:
if(k[0]==i[j]):
k[1]+=1
flag=1
if(flag==0):
ftree[j].append([i[j],1])
values=[]
#print("FP" ,fp1)
#36
for i in range(1,len(fp1)):
dic={}
for p in fp1:
23 | P a g e
17CP024 CP402
dic[p]=0
for j in freq_pattern:
if fp1[i] in j:
f=j.index(fp1[i])
for k in dic.keys():
if k in j[:f]:
dic[k]+=1
values.append(dic)
frp=[]
for i in values:
ele=[]
cnt=0
for j,k in i.items():
if k>=mfreq:
cnt+=1
ele.append([j,fp1[values.index(i)+1],k])
if cnt>1:
rep=[]
for j,k in i.items():
if k>=mfreq:
rep.append(j)
count=0
for p in freq_pattern:
if fp1[values.index(i)+1] in p:
f=p.index(fp1[values.index(i)+1])
flag=0
for q in rep:
if q not in p[:f]:
flag=1
if(flag==0):
count+=1
if(count>=mfreq):
rep.extend([fp1[values.index(i)+1],count])
ele.append(rep)
frp.append(ele)
print("\n")
print("-------------------Frequent Pattern--------------------")
for i in frp[::-1]:
print(i)
return ftree
Data_Matrix=[]
with open('Transactions.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
Data_Matrix.append(row)
#print(row)
res=[]
fData=np.array(Data_Matrix)
#print(fData[0][1],2020)
24 | P a g e
17CP024 CP402
min_freq=int(input("Enter Min. Frequency : "))
#min_confidence=int(input("Enter Min. Confidence : "))/100
Values=Unique(fData)
ans=fptree(fData,min_freq,Values)
print("\n")
print("-------------------------FP Tree-------------------------")
#print("\n")
for i in range(len(ans)):
print("lEVEL :",i,ans[i])
Output:
25 | P a g e
17CP024 CP402
Lab 9
Program
Code:
import math
data = pd.read_csv("kmedoids.csv")
X = "x1"
Y = "x2"
print(data[[X, Y]])
for i in range(k):
data.loc[i, 'cluster'] = i + 1
print(data)
print(centroids.loc[:, X:Y])
# random colors
colors = [numpy.random.random(3).reshape(1, -1) for i in range(k)]
# plot cluster data
plt.figure(figsize=(6, 6))
for i in range(1, k + 1):
plt.scatter(data.loc[data['cluster'] == i, X], data.loc[data['cluster']
== i, Y], s=100, c=colors[i - 1])
plt.xlabel(X)
plt.ylabel(Y)
plt.title('Visualization of raw data')
plt.show()
Output:
27 | P a g e
17CP024 CP402
28 | P a g e
17CP024 CP402
Lab 10
Program
Code:
from itertools import combinations
def distance(p1,p2):
x=p1[0]-p2[0]
y=p1[1]-p2[1]
ans=abs(x)+abs(y)
return ans
dataset=[
[2,6],
[3,8],
[4,7],
[6,2],
[6,4],
[7,3],
[8,5],
[7,6],
[2, 6],
[3, 8],
[4, 7],
[6, 2],
[6, 4],
[7, 3],
[8, 5],
[7, 6]]
30 | P a g e