0% found this document useful (0 votes)
0 views

Train DNN Using Normal Cluster Behaviour Data

Description

Uploaded by

ZuniButt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
0 views

Train DNN Using Normal Cluster Behaviour Data

Description

Uploaded by

ZuniButt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import numpy as np

import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

1️⃣ 1 Load and Preprocess Data (Replace with your dataset)


1#
df = pd.read_csv('energy_data.csv') # Assume it has timestamp, consumption, etc.
features = ['reading', 'airTemperature', 'dewTemperature', 'windSpeed',
'seaLvlPressure']
X = df[features].dropna()

# Scale Data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

2️⃣ Adaptive Clustering (Gaussian Mixture Model)


#
n_clusters = 12 # Adaptive, you can optimize it
GMM = GaussianMixture(n_components=n_clusters, covariance_type='full',
random_state=42)
df['cluster'] = GMM.fit_predict(X_scaled)

# Identify Normal Clusters (Largest ones)


cluster_counts = df['cluster'].value_counts()
normal_clusters = cluster_counts.nlargest(n_clusters-1).index # Keep largest
clusters as normal
df_normal = df[df['cluster'].isin(normal_clusters)]
X_normal = scaler.transform(df_normal[features])

3️⃣ Train a DNN Autoencoder on Normal Data


#
input_dim = X_normal.shape[1]
encoding_dim = 4 # Bottleneck layer

input_layer = Input(shape=(input_dim,))
encoded = Dense(8, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(8, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(input_layer, decoded)


autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Train the model


autoencoder.fit(X_normal, X_normal, epochs=50, batch_size=32, shuffle=True,
validation_split=0.1)

4️⃣ Anomaly Detection Using Reconstruction Error


#
X_all = scaler.transform(df[features]) # All data, including potential anomalies
X_reconstructed = autoencoder.predict(X_all)
reconstruction_error = np.mean(np.abs(X_all - X_reconstructed), axis=1)
df['reconstruction_error'] = reconstruction_error

# Set Anomaly Threshold


threshold = np.percentile(reconstruction_error, 95) # Top 5% highest errors

# Identify Anomalies
df['anomaly'] = df['reconstruction_error'] > threshold

# Visualize Anomalies
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['reading'], label='Energy Consumption', color='blue')
plt.scatter(df[df['anomaly']].index, df[df['anomaly']]['reading'], color='red',
label='Anomalies')
plt.legend()
plt.show()

# Save anomalies
df.to_csv('detected_anomalies.csv', index=False)

print(f"Detected {df['anomaly'].sum()} anomalies!")

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy