0% found this document useful (0 votes)

1 views20 pages

Big Data Practical

The document is a practical file for Big Data Analysis submitted by a student at Apeejay Stya University. It includes various algorithms such as Naïve Bayes, K-Means, K-Nearest Neighbor, Apriori, DBSCAN, Decision Tree, Random Forest, Linear Regression, and Support Vector Machine, along with code snippets and outputs for each algorithm. The file serves as a comprehensive guide for implementing and understanding these data analysis techniques.

Uploaded by

aakash062006

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

1 views20 pages

Big Data Practical

Uploaded by

aakash062006

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 20

APEEJAY STYA UNIVERSITY

Big Data Analysis Practical File

Submitted To: Submitted By:

Dr. Sudhakar Ranjan Himanshi
ASU2021010200225
Bachelor of
Computer Science
Engineering6th
Semester

1
INDEX

S.No. Algorithm Page

1. Naïve Bayes 3
2. K - Means 4-5
3. K – Nearest Neighbor 6-7
4. Apriori Algorithm 8-9
5. DBSCAN 10 - 11
6. Decision Tree 12 - 13
7. Random Forest 14 - 15
8. Linear Regression 16 - 17
9. Support Vector Machine 18 - 20

2
1. Naïve Bayes
CODE
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score

# Load Iris dataset

iris = load_iris()

X = iris.data

y = iris.target

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Naive Bayes classifier

model = GaussianNB()

# Train the model

model.fit(X_train, y_train)

# Predict on the test data

y_pred =

model.predict(X_test)

# Calculate accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

OUTPUT:

3
2. K- Means
CODE
# Generating a small random dataset

X_small = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]])

# Instantiate and fit KMeans

kmeans_small = KMeans(n_clusters=2)

kmeans_small.fit(X_small)

# Print cluster centroids

print("Cluster centroids:")

for i, centroid in enumerate(kmeans_small.centroids):

print(f"Cluster {i+1}: {centroid}")

# Print samples belonging to each cluster print("\

nSamples in each cluster:")

for i in range(kmeans_small.n_clusters):

cluster_samples = X_small[kmeans_small.predict(X_small) == i]

print(f"Cluster {i+1}: {cluster_samples}")

# Plotting the clusters

plt.scatter(X_small[:, 0], X_small[:, 1], c=kmeans_small.predict(X_small), cmap='viridis')

plt.scatter(kmeans_small.centroids[:, 0], kmeans_small.centroids[:, 1], marker='x', s=200, color='red')

plt.title('KMeans Clustering (Small Dataset)')

plt.xlabel('X')

plt.ylabel('Y')

plt.show()

OUTPUT:

4
5
3. K- Nearest Neighbor
CODE
import matplotlib.pyplot as plt

def knn_cal(x, y, x1, y1):

dis_cal = ((x1 - x) 2 + (y1 - y) 2) ** 0.5

return dis_cal

a = []

k = int(input('Enter the no. of coordinates: '))

print(a)

for i in range(k):

j = []

for k in range(2):

x = int(input('Enter the x coordinate: '))

j.append(x)

a.append(j)

print(a)

r = knn_cal(a[0][0], a[0][1], a[1][0], a[1][1])

print('Distance between the two points:', r)

# Plotting the points

x_coords = [coord[0] for coord in a]

y_coords = [coord[1] for coord in a]

plt.scatter(x_coords, y_coords, color='blue')

for i, txt in enumerate(range(k)):

plt.annotate(f'({a[i][0]}, {a[i][1]})', (a[i][0], a[i][1]))

6
plt.xlabel('x')

plt.ylabel('y')

plt.title('Coordinates')

plt.show()

OUTPUT:

7
4. Apriori Algorithm
CODE
from itertools import combinations

from collections import defaultdict

def apriori(dataset, min_support):

# Count occurrences of each item

item_counts = defaultdict(int)

for transaction in dataset:

for item in transaction:

item_counts[item] += 1

# Filter items based on min_support

frequent_items = {item for item, count in item_counts.items() if count >= min_support}

frequent_itemsets = []

k=2

while frequent_items:

# Generate candidate itemsets

candidate_itemsets = {frozenset(items) for items in combinations(frequent_items, k)}

# Count support for each candidate itemset

candidate_supports = defaultdict(int)

for transaction in dataset:

for candidate_itemset in candidate_itemsets:

if candidate_itemset.issubset(transaction):

candidate_supports[candidate_itemset] += 1

# Filter candidates based on min_support

frequent_items = {itemset for itemset, support in candidate_supports.items() if support >= min_support}

# Add frequent itemsets to result

frequent_itemsets.extend(frequent_items)

8
k += 1

return frequent_itemsets

# Example dataset

dataset = [

{'bread', 'milk'},

{'bread', 'diaper', 'beer', 'egg'},

{'milk', 'diaper', 'beer', 'cola'},

{'bread', 'milk', 'diaper', 'beer'},

{'bread', 'milk', 'diaper', 'cola'}

# Minimum support threshold

min_support = 3

# Finding frequent itemsets using Apriori

frequent_itemsets = apriori(dataset, min_support)

print("Frequent itemsets:")

for i, itemset in enumerate(frequent_itemsets):

print(f"Itemset {i+1}: {itemset}")

OUTPUT:

9
5. DBSCAN
CODE
from sklearn.datasets import make_moons

from sklearn.cluster import DBSCAN

import matplotlib.pyplot as plt

# Generate example data

X, _ = make_moons(n_samples=200, noise=0.1, random_state=42)

# Initialize DBSCAN

dbscan = DBSCAN(eps=0.2, min_samples=5)

# Fit the model

dbscan.fit(X)

# Get cluster labels

labels = dbscan.labels_

# Number of clusters in labels, ignoring noise if present.

n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)

print('Estimated number of noise points: %d' % n_noise_)

# Plotting the clusters

plt.figure(figsize=(8, 6))

plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')

plt.title('DBSCAN Clustering')

plt.xlabel('Feature 1')

plt.ylabel('Feature 2')

plt.show()

10
OUTPUT:

11
6. Decision Tree
CODE
from sklearn.tree import DecisionTreeClassifier, plot_tree

import matplotlib.pyplot as plt

import numpy as np

# Define the dataset

# Each data point represents [Outlook, Temperature, Humidity,

Wind] # Outlook: 0 - Sunny, 1 - Overcast, 2 - Rainy

# Temperature: 0 - Cool, 1 - Mild, 2 - Hot

# Humidity: 0 - Normal, 1 - High

# Wind: 0 - Weak, 1 - Strong

X = np.array([

[0, 0, 0, 0],

[0, 0, 0, 1],

[1, 0, 0, 0],

[2, 1, 0, 0],

[2, 2, 1, 0],

[2, 2, 1, 1],

[1, 2, 1, 1],

[0, 1, 0, 0],

[0, 2, 1, 0],

[2, 1, 1, 0],

[0, 1, 1, 1],

[1, 1, 0, 1],

[1, 0, 1, 0],

[2, 1, 0, 1]

])

# Labels: 0 - No, 1 - Yes (Play tennis)

y = np.array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0])

# Create a Decision Tree classifier

clf = DecisionTreeClassifier(random_state=42)

12
# Train the

classifier clf.fit(X,

# Plot the Decision Tree

plt.figure(figsize=(12, 8))

plot_tree(clf, filled=True, feature_names=['Outlook', 'Temperature', 'Humidity', 'Wind'], class_names=['No',

'Yes'])

plt.show()

# Generate random weather conditions for prediction

new_data = np.random.randint(3, size=(1, 4)) # Random integers between 0 and 2 for each feature

# Predict whether a player will play tennis or not

prediction = clf.predict(new_data)

# Output the prediction

if prediction[0] == 1:

print("Player will play tennis.")

else:

print("Player will not play tennis.")

OUTPUT:

13
7. Random Forest
CODE
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, classification_report

# Load Iris dataset

iris = load_iris()

X = iris.data

y = iris.target

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Random Forest classifier

clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model

clf.fit(X_train, y_train)

# Predict on the test data

y_pred = clf.predict(X_test)

# Calculate accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

# Print classification report

print("Classification Report:")

print(classification_report(y_test, y_pred))

14
OUTPUT:

15
8. Linear Regression
CODE
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import

train_test_split from sklearn.linear_model import

LinearRegression from sklearn.metrics import

mean_squared_error

# Create a sample dataset

data = pd.DataFrame({

'bedrooms': np.random.randint(1, 6, 100), # Random number of bedrooms (1-5)

'bathrooms': np.random.randint(1, 4, 100), # Random number of bathrooms (1-3)

'size_sqft': np.random.randint(800, 2500, 100), # Random size of the house in square feet (800-2500)

'price': np.random.randint(200000, 1000000, 100) # Random price of the house ($200k-$1M)

})

# Prepare the data

X = data[['bedrooms', 'bathrooms', 'size_sqft']] # Features

y = data['price'] # Target variable

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Linear Regression model

model = LinearRegression()

# Train the model

model.fit(X_train, y_train)

# Make predictions on the test data

y_pred = model.predict(X_test)

16
# Calculate Mean Squared Error (MSE)

mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)

# Plotting the results

plt.figure(figsize=(10, 6))

plt.scatter(y_test, y_pred)

plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)

plt.title('House Price Prediction')

plt.xlabel('Actual Price')

plt.ylabel('Predicted Price')

plt.grid(True)

plt.show()

OUTPUT:

17
9. Support Vector Machine
CODE
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, classification_report

import numpy as np

import matplotlib.pyplot as plt

# Load the Iris dataset

iris = load_iris()

X = iris.data[:, :2] # Use only the first two features for visualization

y = iris.target

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Support Vector Classifier (SVC)

clf = SVC(kernel='linear')

# Train the model

clf.fit(X_train, y_train)

# Make predictions

y_pred = clf.predict(X_test)

# Calculate accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

# Print classification report

print("Classification Report:")

print(classification_report(y_test, y_pred))

18
# Plot the decision boundary

plt.figure(figsize=(8, 6))

plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', edgecolors='k', s=80)

plt.xlabel('Sepal Length (cm)')

plt.ylabel('Sepal Width (cm)')

plt.title('Iris Dataset - Sepal Features')

plt.grid(True)

# Create a mesh to plot in

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1

y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),

np.arange(y_min, y_max, 0.01))

Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot

Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.3, cmap='viridis')

plt.show()

OUTPUT:

19
20

E-Book To Manifest Business Goals With 7 Chakras
No ratings yet
E-Book To Manifest Business Goals With 7 Chakras
47 pages
Internship Report
No ratings yet
Internship Report
33 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
22 pages
Rahul Final Community Services2025
No ratings yet
Rahul Final Community Services2025
34 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML Lab-1
No ratings yet
ML Lab-1
32 pages
Data Analysis With R Submission
No ratings yet
Data Analysis With R Submission
13 pages
ML 3
No ratings yet
ML 3
24 pages
Data Analysis With R Submission AAKASH
No ratings yet
Data Analysis With R Submission AAKASH
12 pages
LIT 2nd Reading
No ratings yet
LIT 2nd Reading
55 pages
Machine Learning
No ratings yet
Machine Learning
18 pages
Ashwin Report
No ratings yet
Ashwin Report
18 pages
Udacity Machine Learning Analysis Supervised Learning
100% (1)
Udacity Machine Learning Analysis Supervised Learning
504 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
ML5 Implementation
No ratings yet
ML5 Implementation
32 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
ML Yogesh
No ratings yet
ML Yogesh
23 pages
Aiml Practical
No ratings yet
Aiml Practical
17 pages
Vietnamese For Beginners
100% (19)
Vietnamese For Beginners
152 pages
ML Record
No ratings yet
ML Record
19 pages
ML Lab Manual
No ratings yet
ML Lab Manual
19 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
8 To 12 Jaimeen
No ratings yet
8 To 12 Jaimeen
34 pages
Building, Tuning, and Deploying Models
No ratings yet
Building, Tuning, and Deploying Models
11 pages
Record
No ratings yet
Record
23 pages
Prathamesh KRAI
No ratings yet
Prathamesh KRAI
38 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
AIML Lab 7 8 9 10
No ratings yet
AIML Lab 7 8 9 10
10 pages
AIML Record 56
No ratings yet
AIML Record 56
28 pages
AI Lab M.Tech
No ratings yet
AI Lab M.Tech
29 pages
Diksha Iot File
No ratings yet
Diksha Iot File
11 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
Aiml Lab
No ratings yet
Aiml Lab
6 pages
CS3491 Lab Manual
No ratings yet
CS3491 Lab Manual
21 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
ML Manual
No ratings yet
ML Manual
24 pages
ML Lab
No ratings yet
ML Lab
23 pages
ML Manual With Outputs
No ratings yet
ML Manual With Outputs
30 pages
Aam Codes
No ratings yet
Aam Codes
8 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
Machine Learning LAB
No ratings yet
Machine Learning LAB
20 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
ML
No ratings yet
ML
7 pages
ML Practical 205160694034
No ratings yet
ML Practical 205160694034
33 pages
ML Lab Programs 2
No ratings yet
ML Lab Programs 2
16 pages
Karmbir 19 ML
No ratings yet
Karmbir 19 ML
20 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
School Story
No ratings yet
School Story
18 pages
Pre-Oral Paper
No ratings yet
Pre-Oral Paper
7 pages
Easy Pract ML
No ratings yet
Easy Pract ML
7 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Pengaruh Perawatan Perianal Hygiene Dengan Minyak Zaitun Terhadap Pencegahan Ruam Popok Pada Bayi
No ratings yet
Pengaruh Perawatan Perianal Hygiene Dengan Minyak Zaitun Terhadap Pencegahan Ruam Popok Pada Bayi
9 pages
LAB MANUAL For Machine Learning
No ratings yet
LAB MANUAL For Machine Learning
15 pages
Aam p-4 To 6
No ratings yet
Aam p-4 To 6
6 pages
S6 - Data Mining Lab Experiments (Except 1)
No ratings yet
S6 - Data Mining Lab Experiments (Except 1)
6 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Ironhack - Financing Options FR, en & ES
No ratings yet
Ironhack - Financing Options FR, en & ES
32 pages
Decision Tree
No ratings yet
Decision Tree
6 pages
Grade 3 Project Anall Numerates
50% (2)
Grade 3 Project Anall Numerates
8 pages
Year 9i2 ESL LP Report Writing
No ratings yet
Year 9i2 ESL LP Report Writing
5 pages
Module 5
No ratings yet
Module 5
5 pages
Georg Simmel: On Individuality and Social Forms
No ratings yet
Georg Simmel: On Individuality and Social Forms
3 pages
Advance AI and ML LAB
No ratings yet
Advance AI and ML LAB
16 pages
Final ML File
No ratings yet
Final ML File
34 pages
GENJ653BDUGKZNMVZZA78M6A8RK4Y3PLPDPNHSVL 6567d098
No ratings yet
GENJ653BDUGKZNMVZZA78M6A8RK4Y3PLPDPNHSVL 6567d098
2 pages
SHS-Earth-and-Life-Science-Q2W8 2
No ratings yet
SHS-Earth-and-Life-Science-Q2W8 2
3 pages
Gopesh Ingale-Report - 3-Dec-23
No ratings yet
Gopesh Ingale-Report - 3-Dec-23
5 pages
Research Methodology Term Report
No ratings yet
Research Methodology Term Report
4 pages
Revised Research Paper
No ratings yet
Revised Research Paper
32 pages
DLP - Perdev - 10-01-24 - Personal Relationships
No ratings yet
DLP - Perdev - 10-01-24 - Personal Relationships
6 pages
Grade Thresholds - June 2024: Cambridge IGCSE Physics (0625)
No ratings yet
Grade Thresholds - June 2024: Cambridge IGCSE Physics (0625)
2 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
Diarrhea
No ratings yet
Diarrhea
35 pages
E Class Record 9 STE Consumer Chem R. CAYANAN
No ratings yet
E Class Record 9 STE Consumer Chem R. CAYANAN
12 pages
Crash Marklist
No ratings yet
Crash Marklist
1 page
Bank Clerks: Job Description
100% (1)
Bank Clerks: Job Description
3 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
23 pages
First Grade: Newspaper Activity: Major Questions
No ratings yet
First Grade: Newspaper Activity: Major Questions
4 pages
Case Study - Classifier
No ratings yet
Case Study - Classifier
5 pages
Relation Between Sociology and Social Work
100% (1)
Relation Between Sociology and Social Work
7 pages
Subhojit Roy Resume Java Latest
No ratings yet
Subhojit Roy Resume Java Latest
5 pages
Bianca Batti - Curriculum Vitae - August 2018
No ratings yet
Bianca Batti - Curriculum Vitae - August 2018
6 pages
Senior Java Developer
No ratings yet
Senior Java Developer
3 pages
PRC FORM For SPUP NURSING STUDENTS
No ratings yet
PRC FORM For SPUP NURSING STUDENTS
6 pages
ML Algorithms
100% (1)
ML Algorithms
1 page
Kami Export - Gene Expression-Translation-S.1617553074
89% (9)
Kami Export - Gene Expression-Translation-S.1617553074
6 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Big Data Practical

Uploaded by

Big Data Practical

Uploaded by

APEEJAY STYA UNIVERSITY

Big Data Analysis Practical File

Submitted To: Submitted By:

S.No. Algorithm Page

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score

# Load Iris dataset

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Naive Bayes classifier

# Train the model

# Predict on the test data

accuracy = accuracy_score(y_test, y_pred)

# Instantiate and fit KMeans

# Print cluster centroids

for i, centroid in enumerate(kmeans_small.centroids):

print(f"Cluster {i+1}: {centroid}")

# Print samples belonging to each cluster print("\

nSamples in each cluster:")

print(f"Cluster {i+1}: {cluster_samples}")

# Plotting the clusters

plt.scatter(X_small[:, 0], X_small[:, 1], c=kmeans_small.predict(X_small), cmap='viridis')

plt.scatter(kmeans_small.centroids[:, 0], kmeans_small.centroids[:, 1], marker='x', s=200, color='red')

plt.title('KMeans Clustering (Small Dataset)')

def knn_cal(x, y, x1, y1):

dis_cal = ((x1 - x) ** 2 + (y1 - y) ** 2) ** 0.5

k = int(input('Enter the no. of coordinates: '))

x = int(input('Enter the x coordinate: '))

r = knn_cal(a[0][0], a[0][1], a[1][0], a[1][1])

print('Distance between the two points:', r)

# Plotting the points

x_coords = [coord[0] for coord in a]

y_coords = [coord[1] for coord in a]

plt.scatter(x_coords, y_coords, color='blue')

for i, txt in enumerate(range(k)):

plt.annotate(f'({a[i][0]}, {a[i][1]})', (a[i][0], a[i][1]))

from collections import defaultdict

def apriori(dataset, min_support):

# Count occurrences of each item

for transaction in dataset:

for item in transaction:

# Filter items based on min_support

frequent_items = {item for item, count in item_counts.items() if count >= min_support}

# Generate candidate itemsets

candidate_itemsets = {frozenset(items) for items in combinations(frequent_items, k)}

# Count support for each candidate itemset

for transaction in dataset:

for candidate_itemset in candidate_itemsets:

# Filter candidates based on min_support

frequent_items = {itemset for itemset, support in candidate_supports.items() if support >= min_support}

# Add frequent itemsets to result

{'bread', 'diaper', 'beer', 'egg'},

{'milk', 'diaper', 'beer', 'cola'},

{'bread', 'milk', 'diaper', 'beer'},

{'bread', 'milk', 'diaper', 'cola'}

# Minimum support threshold

# Finding frequent itemsets using Apriori

frequent_itemsets = apriori(dataset, min_support)

for i, itemset in enumerate(frequent_itemsets):

print(f"Itemset {i+1}: {itemset}")

from sklearn.cluster import DBSCAN

import matplotlib.pyplot as plt

# Generate example data

X, _ = make_moons(n_samples=200, noise=0.1, random_state=42)

dbscan = DBSCAN(eps=0.2, min_samples=5)

# Fit the model

# Get cluster labels

# Number of clusters in labels, ignoring noise if present.

n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters_)

print('Estimated number of noise points: %d' % n_noise_)

# Plotting the clusters

plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')

import matplotlib.pyplot as plt

# Define the dataset

dis_cal = ((x1 - x) 2 + (y1 - y) 2) ** 0.5