Big Data Practical
Big Data Practical
1
INDEX
2
1. Naïve Bayes
CODE
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
model = GaussianNB()
model.fit(X_train, y_train)
y_pred =
model.predict(X_test)
# Calculate accuracy
print("Accuracy:", accuracy)
OUTPUT:
3
2. K- Means
CODE
# Generating a small random dataset
X_small = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]])
kmeans_small = KMeans(n_clusters=2)
kmeans_small.fit(X_small)
print("Cluster centroids:")
for i in range(kmeans_small.n_clusters):
cluster_samples = X_small[kmeans_small.predict(X_small) == i]
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
OUTPUT:
4
5
3. K- Nearest Neighbor
CODE
import matplotlib.pyplot as plt
return dis_cal
a = []
print(a)
for i in range(k):
j = []
for k in range(2):
j.append(x)
a.append(j)
print(a)
6
plt.xlabel('x')
plt.ylabel('y')
plt.title('Coordinates')
plt.show()
OUTPUT:
7
4. Apriori Algorithm
CODE
from itertools import combinations
item_counts = defaultdict(int)
item_counts[item] += 1
frequent_itemsets = []
k=2
while frequent_items:
candidate_supports = defaultdict(int)
if candidate_itemset.issubset(transaction):
candidate_supports[candidate_itemset] += 1
frequent_itemsets.extend(frequent_items)
8
k += 1
return frequent_itemsets
# Example dataset
dataset = [
{'bread', 'milk'},
min_support = 3
print("Frequent itemsets:")
OUTPUT:
9
5. DBSCAN
CODE
from sklearn.datasets import make_moons
# Initialize DBSCAN
dbscan.fit(X)
labels = dbscan.labels_
n_noise_ = list(labels).count(-1)
plt.figure(figsize=(8, 6))
plt.title('DBSCAN Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
10
OUTPUT:
11
6. Decision Tree
CODE
from sklearn.tree import DecisionTreeClassifier, plot_tree
import numpy as np
X = np.array([
[0, 0, 0, 0],
[0, 0, 0, 1],
[1, 0, 0, 0],
[2, 1, 0, 0],
[2, 2, 1, 0],
[2, 2, 1, 1],
[1, 2, 1, 1],
[0, 1, 0, 0],
[0, 2, 1, 0],
[2, 1, 1, 0],
[0, 1, 1, 1],
[1, 1, 0, 1],
[1, 0, 1, 0],
[2, 1, 0, 1]
])
y = np.array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0])
clf = DecisionTreeClassifier(random_state=42)
12
# Train the
classifier clf.fit(X,
y)
plt.figure(figsize=(12, 8))
plt.show()
new_data = np.random.randint(3, size=(1, 4)) # Random integers between 0 and 2 for each feature
prediction = clf.predict(new_data)
if prediction[0] == 1:
else:
OUTPUT:
13
7. Random Forest
CODE
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# Calculate accuracy
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
14
OUTPUT:
15
8. Linear Regression
CODE
import numpy as np
import pandas as pd
mean_squared_error
data = pd.DataFrame({
'size_sqft': np.random.randint(800, 2500, 100), # Random size of the house in square feet (800-2500)
})
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
16
# Calculate Mean Squared Error (MSE)
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.grid(True)
plt.show()
OUTPUT:
17
9. Support Vector Machine
CODE
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X = iris.data[:, :2] # Use only the first two features for visualization
y = iris.target
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)
# Make predictions
y_pred = clf.predict(X_test)
# Calculate accuracy
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
18
# Plot the decision boundary
plt.figure(figsize=(8, 6))
plt.grid(True)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.show()
OUTPUT:
19
20