Inbuilt Kmeans
Inbuilt Kmeans
ipynb - Colab
import math
import os
import gc
import random
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pprint
input_data = pd.read_csv("Iris.csv")
input_data.head()
input_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Id 150 non-null int64
1 SepalLengthCm 150 non-null float64
2 SepalWidthCm 150 non-null float64
3 PetalLengthCm 150 non-null float64
4 PetalWidthCm 150 non-null float64
5 Species 150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB
input_data = pd.get_dummies(input_data)
input_data = input_data.drop(['Id'], axis = 1)
input_data
scaled_data = StandardScaler().fit_transform(input_data)
scaled_data[:10]
https://colab.research.google.com/drive/1FoEQ0l5WVUciLo7jL2A2eWqAuGB1pwE_#scrollTo=SPDHHI7Miz6h&printMode=true 1/3
9/23/24, 4:39 PM 21BCE2920.ipynb - Colab
kmeans_kwargs = {
"init": "random",
"n_init": 10,
"random_state": 1,
}
sse = []
for k in range(1, 11):
kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
kmeans.fit(scaled_data)
sse.append(kmeans.inertia_)
plt.plot(range(1, 11), sse)
plt.xticks(range(1, 11))
plt.xlabel("Number of Clusters")
plt.ylabel("SSE")
plt.show()
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1,
1, 3, 1, 3, 3, 1, 1, 1, 1, 3, 1, 3, 1, 3, 1, 1, 3, 3, 3, 1, 1, 1,
3, 3, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3], dtype=int32)
https://colab.research.google.com/drive/1FoEQ0l5WVUciLo7jL2A2eWqAuGB1pwE_#scrollTo=SPDHHI7Miz6h&printMode=true 2/3
9/23/24, 4:39 PM 21BCE2920.ipynb - Colab
https://colab.research.google.com/drive/1FoEQ0l5WVUciLo7jL2A2eWqAuGB1pwE_#scrollTo=SPDHHI7Miz6h&printMode=true 3/3