Aiml 1-10
Aiml 1-10
Aim:
To implement Breadth-First Search (BFS) and Depth-First Search (DFS) algorithms in
Python.
711723104036 Page | 1
'G': [],
'H': []
}
def bfs(start):
visited = set()
queue = deque([start])
traversal_order = []
while queue:
node = queue.popleft()
if node not in visited:
traversal_order.append(node)
visited.add(node)
queue.extend(graph[node])
print("BFS Traversal:", " → ".join(traversal_order))
def dfs(start, visited=None, traversal_order=None):
if visited is None:
visited = set()
if traversal_order is None:
traversal_order = []
if start not in visited:
traversal_order.append(start)
visited.add(start)
for neighbor in graph[start]:
dfs(neighbor, visited, traversal_order)
return traversal_order
def draw_tree(graph, root):
G = nx.DiGraph()
for node in graph:
711723104036 Page | 2
for neighbor in graph[node]:
G.add_edge(node, neighbor)
pos = hierarchy_pos(G, root)
plt.figure(figsize=(8, 5))
nx.draw(G, pos, with_labels=True, node_color='skyblue', edge_color='black',
node_size=2000, font_size=12, font_weight="bold", arrows=True)
plt.title("Tree Representation of Graph")
plt.show()
def hierarchy_pos(G, root, width=2., vert_gap=1., xcenter=0.5, pos=None, parent=None,
level=0):
if pos is None:
pos = {root: (xcenter, -level * vert_gap)}
neighbors = list(G.neighbors(root))
if parent is not None and parent in neighbors:
neighbors.remove(parent)
if neighbors:
dx = width / len(neighbors)
next_x = xcenter - (width - dx) /
for neighbor in neighbors:
pos[neighbor] = (next_x, - (level + 1) * vert_gap)
next_x += dx
for neighbor in neighbors:
pos = hierarchy_pos(G, neighbor, width=dx, vert_gap=vert_gap,
xcenter=pos[neighbor][0], pos=pos, parent=root, level=level + 1)
return pos
def main():
print("BFS Traversal from node A:")
bfs('A')
print("\nDFS Traversal from node A:")
dfs_order = dfs('A')
711723104036 Page | 3
print("DFS Traversal:", " → ".join(dfs_order))
draw_tree(graph, 'A')
if __name__ == '__main__':
main()
Output:
Result:
The BFS and DFS algorithms are successfully implemented in Python. BFS explores level
by level, while DFS explores as deep as possible before backtracking.
711723104036 Page | 4
EX NO : 2 Implementation of Informed search algorithms (A*,
DATE: memory-bounded A*)
Aim:
To implement A* and Memory-Bounded A* search algorithms for optimal pathfinding and
decision-making in an informed search environment.
Code:
import heapq
class Graph:
def __init__(self):
self.edges = {}
self.heuristics = {}
711723104036 Page | 5
def add_edge(self, node, neighbor, cost):
self.edges.setdefault(node, []).append((neighbor, cost))
while open_list:
f, g, current, path = heapq.heappop(open_list)
if current in visited:
continue
visited.add(current)
711723104036 Page | 6
return None, float('inf')
while open_list:
if len(memory) >= memory_limit:
# Remove the node with the highest cost to stay within memory
max_cost_node = max(memory, key=memory.get)
del memory[max_cost_node]
if current in visited:
continue
visited.add(current)
memory[current] = g
711723104036 Page | 7
new_g = g + cost
f_score = new_g + self.heuristics.get(neighbor, float('inf'))
heapq.heappush(open_list, (f_score, new_g, neighbor, path))
# Graph Definition
graph = Graph()
graph.add_edge('A', 'B', 4)
graph.add_edge('A', 'C', 2)
graph.add_edge('B', 'D', 5)
graph.add_edge('B', 'E', 10)
graph.add_edge('C', 'D', 8)
graph.add_edge('D', 'E', 2)
graph.add_edge('E', 'F', 3)
graph.set_heuristic('A', 7)
graph.set_heuristic('B', 6)
graph.set_heuristic('C', 2)
graph.set_heuristic('D', 3)
graph.set_heuristic('E', 1)
graph.set_heuristic('F', 0)
astar_path, astar_cost = graph.astar('A', 'F')
memory_bounded_path, memory_bounded_cost = graph.memory_bounded_astar('A', 'F',
memory_limit=3)
711723104036 Page | 8
print_result("A*", astar_path, astar_cost)
print_result("Memory-Bounded A*", memory_bounded_path, memory_bounded_cost)
Output:
Result:
Both A and Memory-Bounded A** algorithms were implemented successfully. The A
algorithm* finds the shortest path optimally, while Memory-Bounded A* handles memory
constraints while searching efficiently.
711723104036 Page | 9
Aim:
To implement the Naïve Bayes Classification Algorithm for predicting class labels using a
given dataset.
Algorithm:
1. Load the dataset and preprocess it.
2. Split the dataset into training and testing sets.
3. Apply the Naïve Bayes formula:
P(Class∣Features)=P(Features∣Class)×P(Class)P(Features)P(Class | Features) = \
frac{P(Features | Class) \times P(Class)}
{P(Features)}P(Class∣Features)=P(Features)P(Features∣Class)×P(Class)
4. Train the Naïve Bayes classifier using training data.
5. Predict the class labels for test data.
6. Evaluate the model using accuracy and other metrics.
Code :
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
data = {
'Outlook': [0, 0, 1, 2, 2, 2, 1, 0, 0, 2, 0, 1, 1, 2],
'Temperature': [0, 0, 0, 1, 2, 2, 2, 1, 2, 1, 1, 1, 0, 1],
'Humidity': [0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1],
'Wind': [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1],
'PlayTennis': [0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0] # 0 = No, 1 = Yes
}
df = pd.DataFrame(data)
X = df.drop('PlayTennis', axis=1)
711723104036 Page | 10
y = df['PlayTennis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(" Predicted Values:", y_pred)
print(" Actual Values: ", y_test.values)
print("Accuracy Score: ", round(accuracy, 2))
print("\n Classification Report:\n", report)
Output (Example)
711723104036 Page | 11
Result:
The Naïve Bayes classifier was successfully implemented, trained, and tested. The model
achieved 100% accuracy on this small dataset, indicating correct predictions.
711723104036 Page | 12
EX NO : 4 Implementation of Bayesian Networks
DATE:
Aim:
To implement Bayesian Networks for probabilistic reasoning and decision-making using
Python.
Code:
from pgmpy.models import DiscreteBayesianNetwork # Use DiscreteBayesianNetwork
instead
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
model = DiscreteBayesianNetwork([
('Cloudy', 'Rain'),
('Cloudy', 'Sprinkler'),
('Sprinkler', 'WetGrass'),
('Rain', 'WetGrass')
711723104036 Page | 13
])
cpd_cloudy = TabularCPD(variable='Cloudy', variable_card=2, values=[[0.5], [0.5]])
cpd_rain = TabularCPD(
variable='Rain', variable_card=2,
values=[[0.8, 0.2], # Rain = 0 (False)
[0.2, 0.8]], # Rain = 1 (True)
evidence=['Cloudy'], evidence_card=[2]
)
cpd_sprinkler = TabularCPD(
variable='Sprinkler', variable_card=2,
values=[[0.6, 0.1], # Sprinkler = 0
[0.4, 0.9]], # Sprinkler = 1
evidence=['Cloudy'], evidence_card=[2]
)
cpd_wetgrass = TabularCPD(
variable='WetGrass', variable_card=2,
values=[[1.0, 0.1, 0.1, 0.01], # WetGrass = 0 (dry)
[0.0, 0.9, 0.9, 0.99]], # WetGrass = 1 (wet)
evidence=['Sprinkler', 'Rain'], evidence_card=[2, 2]
)
model.add_cpds(cpd_cloudy, cpd_rain, cpd_sprinkler, cpd_wetgrass)
assert model.check_model()
inference = VariableElimination(model)
prob = inference.query(variables=['WetGrass'], evidence={'Cloudy': 1})
711723104036 Page | 14
Result:
The Bayesian Network was successfully implemented. The model performs probabilistic
reasoning based on given conditions and allows inference queries
711723104036 Page | 15
EX NO : 5 Implementation of Regression Models
DATE:
Aim:
To implement Regression Models to analyze relationships between variables and predict
continuous outcomes using Python.
Algorithm :
1. Load the dataset and preprocess it.
2. Split the dataset into training and testing sets.
3. Select a regression model (e.g., Linear Regression, Polynomial Regression).
4. Train the model using the training dataset.
5. Predict the target variable on the test dataset.
6. Evaluate the model using metrics like Mean Squared Error (MSE) and R² score.
7. Display results using visualizations.
Code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Target'] = data.target
# Selecting 'AveRooms' (Average Number of Rooms) as the independent variable
X = df[['AveRooms']]
y = df['Target'] # House price as dependent variable
711723104036 Page | 16
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
plt.scatter(X_test, y_test, color='blue', label='Actual Values')
plt.scatter(X_test, y_pred, color='red', label='Predicted Values', alpha=0.5)
plt.plot(X_test, y_pred, color='green', linewidth=2, label='Regression Line')
plt.xlabel("Average Number of Rooms")
plt.ylabel("House Price")
plt.title("Linear Regression on California Housing Data (AveRooms)")
plt.legend()
plt.show()
print("Mean Squared Error:", mse)
print("R² Score:", r2)
print("Intercept:", model.intercept_)
print("Coefficient:", model.coef_)
Output :
711723104036 Page | 17
Result:
The Linear Regression Model was successfully implemented. The model predicts
continuous values based on given input features and evaluates performance using MSE and
R² score.
711723104036 Page | 18
EX NO : 6 Build Decision Trees and Random Forests
DATE:
Aim:
To build and evaluate Decision Tree and Random Forest models for classification.
Algorithm:
1. Import libraries and load dataset
2. Preprocess data (handle nulls, encode if needed)
3. Split into train/test
4. Fit Decision Tree model
5. Fit Random Forest model
6. Predict and evaluate accuracy
Code:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
dt_model = DecisionTreeClassifier(max_depth=4)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
dt_acc = accuracy_score(y_test, dt_pred)
rf_model = RandomForestClassifier(max_depth=4, n_estimators=100)
711723104036 Page | 19
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)
print("Decision Tree Accuracy:", dt_acc)
print("Random Forest Accuracy:", rf_acc)
dt_cm = confusion_matrix(y_test, dt_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(dt_cm, annot=True, fmt="d", cmap="Blues", xticklabels=data.target_names,
yticklabels=data.target_names)
plt.title("Decision Tree Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
rf_cm = confusion_matrix(y_test, rf_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(rf_cm, annot=True, fmt="d", cmap="Blues", xticklabels=data.target_names,
yticklabels=data.target_names)
plt.title("Random Forest Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
711723104036 Page | 20
Output:
Result:
Random Forest generally gives better accuracy than Decision Tree.
711723104036 Page | 21
EX NO : 7 Build SVM models
DATE:
Aim:
To build and evaluate a Support Vector Machine (SVM) model for classification.
Algorithm :
1. Import libraries and dataset
2. Preprocess if needed
3. Split into train/test
4. Train SVM classifier
5. Predict and evaluate
Code :
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_acc = accuracy_score(y_test, svm_pred)
print("SVM Accuracy:", svm_acc)
711723104036 Page | 22
cm = confusion_matrix(y_test, svm_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cmap='Greens', fmt='d',
xticklabels=data.target_names, yticklabels=data.target_names)
plt.title("SVM Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
Output:
Result:
SVM performs well with high accuracy on the Iris dataset.
711723104036 Page | 23
EX NO : 8 Implement Ensembling Techniques
DATE:
Aim:
To compare the performance of ensemble learning techniques—Voting, Bagging, and
Boosting—on the Iris dataset using Logistic Regression, Decision Tree, and SVM classifiers.
Algorithm:
1. Import libraries and dataset
2. Split the dataset
3. Apply VotingClassifier (Hard/Soft)
4. Apply BaggingClassifier
5. Apply AdaBoostClassifier
6. Evaluate all models
Code:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
lr = LogisticRegression(solver='liblinear', random_state=42)
dt = DecisionTreeClassifier(random_state=42)
svm = SVC(probability=True, kernel='rbf', random_state=42)
711723104036 Page | 24
voting = VotingClassifier(estimators=[('LogReg', lr), ('DecisionTree', dt), ('SVM', svm)],
voting='soft')
voting.fit(X_train, y_train)
voting_pred = voting.predict(X_test)
voting_acc = accuracy_score(y_test, voting_pred)
bagging = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10,
random_state=42)
bagging.fit(X_train, y_train)
bagging_pred = bagging.predict(X_test)
bagging_acc = accuracy_score(y_test, bagging_pred)
boosting = AdaBoostClassifier(n_estimators=50, random_state=42)
boosting.fit(X_train, y_train)
boosting_pred = boosting.predict(X_test)
boosting_acc = accuracy_score(y_test, boosting_pred)
print(f"Voting Classifier Accuracy: {voting_acc:.4f}")
print(f"Bagging Classifier Accuracy: {bagging_acc:.4f}")
print(f"Boosting Classifier Accuracy: {boosting_acc:.4f}")
Output:
Result:
Voting, Bagging, and Boosting classifiers were successfully implemented and tested, with
accuracy scores indicating effective classification on the Iris dataset.
711723104036 Page | 25
EX NO : 9 Implement clustering algorithms.
DATE:
Aim:
To apply KMeans and Agglomerative clustering on the Iris dataset and visualize the cluster
patterns using PCA.
Algorithm:
1. Import libraries and dataset
2. Apply KMeans clustering
3. Apply Agglomerative Clustering
4. Visualize clusters using PCA (optional)
5. Print cluster labels
Code:
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
iris = load_iris()
X = iris.data
kmeans_model = KMeans(n_clusters=3, random_state=42)
kmeans_clusters = kmeans_model.fit_predict(X)
agglo_model = AgglomerativeClustering(n_clusters=3)
agglo_clusters = agglo_model.fit_predict(X)
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X)
plt.figure(figsize=(12, 5))
711723104036 Page | 26
plt.subplot(1, 2, 1)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=kmeans_clusters, cmap='viridis')
plt.title('KMeans Clustering')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.subplot(1, 2, 2)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=agglo_clusters, cmap='plasma')
plt.title('Agglomerative Clustering')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.tight_layout()
plt.show()
Output:
Result:
Both clustering algorithms effectively grouped the data into three clusters, which were clearly
visualized using PCA-reduced features.
711723104036 Page | 27
EX NO : 10 Implement EM for Bayesian Networks
DATE:
Aim:
Algorithm:
Code:
import pandas as pd
data = pd.DataFrame([
['Rainy', 'No'],
['Sunny', 'Yes'],
['Rainy', None],
['Sunny', 'Yes'],
['Rainy', 'No'],
['Sunny', None]
], columns=['Weather', 'Play'])
711723104036 Page | 28
inference = VariableElimination(model)
print(cpd)
print("\nInference Result:")
print(result)
Output:
Result:
The EM algorithm successfully estimated the missing data and provided conditional
probability distributions for the Bayesian Network.
711723104036 Page | 29