0% found this document useful (0 votes)
19 views13 pages

ML File

Uploaded by

jaroji6083
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views13 pages

ML File

Uploaded by

jaroji6083
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 13

FIND – S ALGORITHM

import csv

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

def find_s(data):

most_specific = data[0][1:]

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in


range(len(most_specific))]

if more_general(current, most_specific):

most_specific = current

return most_specific

with open('training_data.csv', 'r') as file:

reader = csv.reader(file)

next(reader) # Skip header

data = list(reader)

print("Most Specific Hypothesis:", find_s(data))


CANDIDATE – ELIMINATION ALGO

import csv

def more_general(h1, h2):

return all(h1[i] == '?' or h2[i] == '?' or h1[i] == h2[i] for i in range(len(h1)))

def find_s(data):

most_specific = data[0][1:]

for instance in data[1:]:

current = ['?' if instance[1:][i] != most_specific[i] else most_specific[i] for i in


range(len(most_specific))]

if more_general(current, most_specific):

most_specific = current

return most_specific

def find_g(data):

most_general = ['?'] * len(data[0][1:])

for instance in data:

current = [instance[1:][i] if instance[1:][i] != most_general[i] else '?' for i in


range(len(most_general))]

if not more_general(most_general, current):

most_general = current

return most_general

with open('training_data.csv', 'r') as file:

reader = csv.reader(file)

next(reader) # Skip header

data = list(reader)

s, g = find_s(data), find_g(data)

print("Set of all hypotheses consistent with the training examples:")

for i, (s_val, g_val) in enumerate(zip(s, g), start=1):

print(f"Attribute {i}: {s_val if g_val == '?' else g_val}")


DECISION TREE – ID3

import math

def entropy(data, target_attr):

val_freq = {}

for record in data:

val_freq[record[target_attr]] = val_freq.get(record[target_attr], 0) + 1

return sum(-freq/len(data) * math.log2(freq/len(data)) for freq in val_freq.values())

def info_gain(data, attr, target_attr):

base_entropy = entropy(data, target_attr)

attr_vals = set(record[attr] for record in data)

exp_entropy = sum((len([rec for rec in data if rec[attr] == val])/len(data)) * entropy([rec for


rec in data if rec[attr] == val], target_attr) for val in attr_vals)

return base_entropy - exp_entropy

def id3(data, attrs, target_attr):

base_entropy = entropy(data, target_attr)

if base_entropy == 0:

return next(iter(set(record[target_attr] for record in data)))

elif len(attrs) == 0:

return max(set(record[target_attr] for record in data), key=[record[target_attr] for


record in data].count)

else:

attr_gains = [info_gain(data, attr, target_attr) for attr in attrs]

selected_attr = attrs[attr_gains.index(max(attr_gains))]

node = {selected_attr: {}}

attr_values = set(record[selected_attr] for record in data)

for value in attr_values:

new_data = [record for record in data if record[selected_attr] == value]

new_attrs = attrs.copy()

new_attrs.remove(selected_attr)
child_node = id3(new_data, new_attrs, target_attr)

node[selected_attr][value] = child_node

return node

def classify(tree, sample):

for attr, values in tree.items():

value = sample[attributes.index(attr)]

if value in values:

child = values[value]

if isinstance(child, dict):

return classify(child, sample)

else:

return child

data = [['Sunny', 'Hot', 'High', 'False'], ['Sunny', 'Hot', 'High', 'True'], ['Overcast', 'Hot', 'High',
'False'], ['Rain', 'Mild', 'High', 'False'], ['Rain', 'Cool', 'Normal', 'False'], ['Rain', 'Cool', 'Normal',
'True'], ['Overcast', 'Cool', 'Normal', 'True'], ['Sunny', 'Mild', 'High', 'False'], ['Sunny', 'Cool',
'Normal', 'False'], ['Rain', 'Mild', 'Normal', 'False'], ['Sunny', 'Mild', 'Normal', 'True'],
['Overcast', 'Mild', 'High', 'True'], ['Overcast', 'Hot', 'Normal', 'False'], ['Rain', 'Mild', 'High',
'True']]

attributes = ['Outlook', 'Temperature', 'Humidity', 'Play Tennis']

tree = id3(data, [i for i in range(len(data[0])-1)], len(data[0])-1)

print("Decision Tree:", tree)

sample = ['Rain', 'Mild', 'High']

prediction = classify(tree, sample)

print("Prediction for sample", sample, ":", prediction)


Backpropagation algorithm --

import numpy as np

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder, StandardScaler

iris = datasets.load_iris()

X, y = iris.data, iris.target

y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = StandardScaler().fit_transform(X_train)

X_test = StandardScaler().transform(X_test)

input_size, hidden_size, output_size = 4, 8, 3

W1, b1, W2, b2 = np.random.randn(input_size, hidden_size), np.zeros((1, hidden_size)),


np.random.randn(hidden_size, output_size), np.zeros((1, output_size))

sigmoid = lambda x: 1 / (1 + np.exp(-x))

sigmoid_derivative = lambda x: x * (1 - x)

def backpropagation(X, y, learning_rate=0.1, epochs=10000):

for _ in range(epochs):

layer1 = sigmoid(np.dot(X, W1) + b1)

layer2 = sigmoid(np.dot(layer1, W2) + b2)

error = y - layer2

delta2 = error * sigmoid_derivative(layer2)

W2_grad, b2_grad = np.dot(layer1.T, delta2), np.sum(delta2, axis=0, keepdims=True)

delta1 = np.dot(delta2, W2.T) * sigmoid_derivative(layer1)

W1_grad, b1_grad = np.dot(X.T, delta1), np.sum(delta1, axis=0, keepdims=True)

W2 += learning_rate * W2_grad

b2 += learning_rate * b2_grad
W1 += learning_rate * W1_grad

b1 += learning_rate * b1_grad

return W1, b1, W2, b2

W1, b1, W2, b2 = backpropagation(X_train, y_train)

def predict(X, W1, b1, W2, b2):

layer1 = sigmoid(np.dot(X, W1) + b1)

layer2 = sigmoid(np.dot(layer1, W2) + b2)

return np.argmax(layer2, axis=1)

y_pred = predict(X_test, W1, b1, W2, b2)

accuracy = np.mean(y_pred == np.argmax(y_test, axis=1))

print(f"Test accuracy: {accuracy * 100:.2f}%")


naïve Bayesian classifier—

import csv

import math

def load_data(filename):

return [row for row in csv.reader(open(filename))]

def split_data(data):

features = [row[:-1] for row in data]

labels = [row[-1] for row in data]

return features, labels

def get_stats(feature):

values = [float(x) for x in feature if x.isdigit()]

mean = sum(values) / len(values)

stdev = (sum((x - mean) ** 2 for x in values) / len(values)) ** 0.5

return mean, stdev

def pdf(x, mean, stdev):

return math.exp(-((x - mean) ** 2) / (2 * stdev ** 2)) / (stdev * math.sqrt(2 * math.pi))

def prob_cat(value, values):

return values.count(value) / len(values)

def train(features, labels):

label_counts = {label: labels.count(label) for label in set(labels)}

feature_stats = [{label: [float(feat) if feat.isdigit() else feat for feat in feats] for label, feats
in zip([label] * len(features), zip(*features))} for label in set(labels)]

for i, feature in enumerate(feature_stats):

for label, values in feature.items():

if all(isinstance(val, float) for val in values):


mean, stdev = get_stats(values)

feature[label] = (mean, stdev)

return label_counts, feature_stats

def classify(feature_vector, label_counts, feature_stats):

label_probs = {label: math.log(count / sum(label_counts.values())) for label, count in


label_counts.items()}

for i, feature_value in enumerate(feature_vector):

for label, stats in feature_stats[i].items():

if isinstance(stats, tuple):

mean, stdev = stats

label_probs[label] += math.log(pdf(float(feature_value), mean, stdev))

else:

label_probs[label] += math.log(prob_cat(feature_value, stats))

return max(label_probs.items(), key=lambda x: x[1])[0]

def compute_accuracy(test_features, test_labels, label_counts, feature_stats):

correct = sum(classify(feature_vector, label_counts, feature_stats) == label for


feature_vector, label in zip(test_features, test_labels))

return correct / len(test_labels)

training_data = load_data('training_data.csv')

features, labels = split_data(training_data)

label_counts, feature_stats = train(features, labels)

test_data = load_data('test_data.csv')

test_features, test_labels = split_data(test_data)

accuracy = compute_accuracy(test_features, test_labels, label_counts, feature_stats)

print(f"Accuracy: {accuracy * 100:.2f}%")


Bayesian network—

from pgmpy.models import BayesianModel

from pgmpy.estimators import MaximumLikelihoodEstimator

# Construct the Bayesian network

model = BayesianModel([('age', 'num'), ('sex', 'num'), ('cp', 'num'), ('trestbps', 'num'), ('chol',
'num'), ('fbs', 'num'), ('restecg', 'num'), ('thalach', 'num'), ('exang', 'num'), ('oldpeak', 'num'),
('slope', 'num'), ('ca', 'num'), ('thal', 'num')])

# Estimate the parameters of the Bayesian network

estimator = MaximumLikelihoodEstimator(model, heart_data)

estimator.estimate()

# Perform inference

inference = model.fit(heart_data)

# Example: Predict the probability of heart disease for a new patient

patient_data = {'age': 50, 'sex': 1, 'cp': 3, 'trestbps': 130, 'chol': 250, 'fbs': 0, 'restecg': 0,
'thalach': 180, 'exang': 0, 'oldpeak': 0.8, 'slope': 2, 'ca': 0, 'thal': 3}

# Convert categorical variables to integer values for the new patient

patient_data = pd.DataFrame([patient_data]).replace({'cp': {0: 'cp_0', 1: 'cp_1', 2: 'cp_2', 3:


'cp_3'}, 'restecg': {0: 'restecg_0', 1: 'restecg_1', 2: 'restecg_2'}, 'slope': {0: 'slope_0', 1:
'slope_1', 2: 'slope_2'}, 'thal': {0: 'thal_0', 1: 'thal_1', 2: 'thal_2', 3: 'thal_3'}})

# Perform inference for the new patient

query = inference.map_query('num', patient_data)

# Print the probability of heart disease

print(f"Probability of heart disease: {query.values[-1]:.2f}")


EM algorithm—

import pandas as pd

from sklearn.mixture import GaussianMixture

from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score

# Load the dataset from a CSV file

data = pd.read_csv('dataset.csv')

X = data.iloc[:, :-1].values # Features

y = data.iloc[:, -1].values # Labels (for evaluation purposes)

# EM Clustering

print("EM Clustering:")

em = GaussianMixture(n_components=3, covariance_type='full', max_iter=100,


random_state=42)

em_labels = em.fit_predict(X)

em_score = silhouette_score(X, em_labels)

print(f"Silhouette Score: {em_score:.2f}")

# k-Means Clustering

print("\nk-Means Clustering:")

kmeans = KMeans(n_clusters=3, random_state=42)

kmeans_labels = kmeans.fit_predict(X)

kmeans_score = silhouette_score(X, kmeans_labels)

print(f"Silhouette Score: {kmeans_score:.2f}")

# Compare the results

print("\nComparison:")

if em_score > kmeans_score:

print("EM algorithm performs better than k-Means for this dataset.")

else:

print("k-Means algorithm performs better than EM for this dataset.")


k-Nearest Neighbour algorithm—

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

# Load the iris dataset

iris = load_iris()

X, y = iris.data, iris.target

# Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a kNN classifier object

knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier

knn.fit(X_train, y_train)

# Make predictions on the test set

y_pred = knn.predict(X_test)

# Calculate the accuracy

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")

# Print correct and wrong predictions

print("\nCorrect Predictions:")

correct_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred == true]

for i in correct_indices:
print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:
{iris.target_names[y_test[i]]})")

print("\nWrong Predictions:")

wrong_indices = [i for i, (pred, true) in enumerate(zip(y_pred, y_test)) if pred != true]

for i in wrong_indices:

print(f"Instance {i+1}: Predicted: {iris.target_names[y_pred[i]]} (Correct:


{iris.target_names[y_test[i]]})")
non-parametric Locally Weighted Regression algorithm—

import numpy as np

import matplotlib.pyplot as plt

# Sample dataset

X = np.linspace(-3, 3, 50)

y = np.sin(X) + np.random.normal(0, 0.2, len(X))

# Function to calculate the weight for a given x and x_i

def weight(x, x_i, tau):

return np.exp(-(x - x_i)**2 / (2 * tau**2))

# LWR function

def lwr(x, X, y, tau):

weights = np.array([weight(x, x_i, tau) for x_i in X])

W = np.diag(weights)

X_mat = np.vstack([np.ones(len(X)), X]).T

theta = np.linalg.pinv(X_mat.T @ W @ X_mat) @ (X_mat.T @ W @ y)

return theta[1]

# LWR predictions

tau = 1.0

y_pred = [lwr(x_val, X, y, tau) for x_val in X]

# Plot the data and LWR fit

plt.figure(figsize=(10, 6))

plt.scatter(X, y, label='Data Points', color='b', marker='o')

plt.plot(X, y_pred, label='LWR Fit', color='r')

plt.xlabel('X')

plt.ylabel('y')

plt.title('Locally Weighted Regression')

plt.legend()

plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy