Code for Clustering Algorithms in Machine Learning with Python Tutorial


View on Github

kmeans_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import KMeans
from matplotlib import pyplot

# 2 features, 2 informative, 0 redundant, 1 cluster per class
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10) 

# 2 clusters
m = KMeans(n_clusters=2) 
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('K-means (No. of Clusters = 3)')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

minibatch_kmeans_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# 3 clusters
m = MiniBatchKMeans(n_clusters=3) 
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Mini Batch K-means')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

time_diff_minibatch_and_kmeans.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from matplotlib import pyplot
import timeit

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# start timer for Mini Batch K-Means
t1_mkm = timeit.default_timer() 
m = MiniBatchKMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for Mini Batch K-Means
t2_mkm = timeit.default_timer()
# start timer for K-Means
t1_km = timeit.default_timer()
m = KMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for K-Means
t2_km = timeit.default_timer()
# print time difference
print("Time difference between Mini Batch K-Means and K-Means = ",
      (t2_km-t1_km)-(t2_mkm-t1_mkm))

affinity_propagation.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AffinityPropagation
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)

# initialize the model
m = AffinityPropagation(damping=0.9)
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Affinity Propagation Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

dbscan_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import DBSCAN
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = DBSCAN(eps=0.05, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('DBSCAN Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

optics.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import OPTICS
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)

# init the model
m = OPTICS(eps=0.5, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('OPTICS Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

birch.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import Birch
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 clusters
m = Birch(threshold=0.05, n_clusters=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Birch Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

agglomerative_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AgglomerativeClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = AgglomerativeClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Agglomerative Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

meanshift_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MeanShift
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = MeanShift()
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Mean Shift Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

spectral_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import SpectralClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = SpectralClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Spectral Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

gmm.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.mixture import GaussianMixture
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 components
m = GaussianMixture(n_components=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Gaussian Mixture Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

metrics.py

from sklearn import metrics

y_true = [5, 3, 5, 4, 4, 5]
y_pred = [3, 5, 5, 4, 3, 4]
# homogeneity: each cluster contains only members of a single class.
print(metrics.homogeneity_score(y_true, y_pred))
# completeness: all members of a given class are assigned to the same cluster.
print(metrics.completeness_score(y_true, y_pred))
# v-measure: harmonic mean of homogeneity and completeness
print(metrics.v_measure_score(y_true, y_pred))

 

 



pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy