0% found this document useful (0 votes)

1 views6 pages

credit_card_clustering_autoencoder

This document outlines a step-by-step process for building and training an autoencoder using PyTorch on a credit card dataset. It includes steps for library installation, data loading and preprocessing, model definition, training, and evaluation of clustering results. Finally, it visualizes the clustering results using t-SNE.

Uploaded by

saemon2714

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

1 views6 pages

credit_card_clustering_autoencoder

Uploaded by

saemon2714

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 6

{

"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2Cbo3KpwX0v0"
},
"outputs": [],
"source": [
"# Step 1: Install required libraries\n",
"!pip install torch torchvision datasets scikit-learn matplotlib seaborn
tqdm\n",
"!pip install datasets --upgrade"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qY5lP9oiX0v1"
},
"outputs": [],
"source": [
"# Step 2: Import libraries\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from datasets import load_dataset\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.manifold import TSNE\n",
"from sklearn.metrics import silhouette_score, davies_bouldin_score,
calinski_harabasz_score\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from tqdm import tqdm\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1olmebAKX0v1"
},
"outputs": [],
"source": [
"# Step 3: Load and preprocess the Hugging Face imodels/credit-card
dataset\n",
"!rm -rf ~/.cache/huggingface/datasets # Clear cache to avoid loading
issues\n",
"\n",
"# Check for GPU\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"if device.type == 'cpu':\n",
" print('WARNING: No GPU detected. Training will be slower. To enable
GPU, go to Runtime > Change runtime type > Select GPU.')\n",
"else:\n",
" print(f'Using device: {device}')\n",
"\n",
"# Load dataset\n",
"try:\n",
" print('Loading imodels/credit-card dataset...')\n",
" dataset = load_dataset('imodels/credit-card',
download_mode='force_redownload')['train']\n",
" print('Dataset loaded successfully')\n",
"except Exception as e:\n",
" print(f'Error loading dataset: {e}')\n",
" raise\n",
"\n",
"# Convert to pandas DataFrame\n",
"df = pd.DataFrame(dataset)\n",
"\n",
"# Inspect dataset\n",
"print('Dataset sample:')\n",
"print(df.head())\n",
"print('\\nUnique values per feature:')\n",
"for col in df.columns:\n",
" print(f'{col}: {df[col].unique()}')\n",
"\n",
"# Define feature and target columns\n",
"features = [col for col in df.columns if col !=
'default.payment.next.month']\n",
"target = 'default.payment.next.month'\n",
"\n",
"# Mapping for categorical features (for interpretation)\n",
"mapping_info = {\n",
" 'SEX': {1: 'male', 2: 'female'},\n",
" 'EDUCATION': {0: 'other', 1: 'graduate school', 2: 'university', 3:
'high school', 4: 'other', 5: 'other', 6: 'other'},\n",
" 'MARRIAGE': {0: 'other', 1: 'married', 2: 'single', 3: 'other'},\n",
" 'default.payment.next.month': {0: 'no default', 1: 'default'}\n",
"}\n",
"print('\\nMapping of integer values to categories:')\n",
"for col, mapping in mapping_info.items():\n",
" print(f'{col}: {mapping}')\n",
"\n",
"# Scale numerical features\n",
"numerical_features = ['LIMIT_BAL', 'AGE', 'BILL_AMT1', 'BILL_AMT2',
'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',\n",
" 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4',
'PAY_AMT5', 'PAY_AMT6']\n",
"scaler = StandardScaler()\n",
"df[numerical_features] = scaler.fit_transform(df[numerical_features])\n",
"\n",
"# Categorical features are already integer-encoded\n",
"X = df[features].astype(np.float32).values\n",
"y = df[target].astype(np.float32).values\n",
"\n",
"# Create PyTorch dataset\n",
"class CreditCardDataset(Dataset):\n",
" def __init__(self, features):\n",
" self.features = torch.tensor(features, dtype=torch.float32)\n",
" def __len__(self):\n",
" return len(self.features)\n",
" def __getitem__(self, idx):\n",
" return self.features[idx]\n",
"\n",
"credit_dataset = CreditCardDataset(X)\n",
"dataloader = DataLoader(credit_dataset, batch_size=64, shuffle=True)\n",
"print('Step 3 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lT3Y7Uo_X0v2"
},
"outputs": [],
"source": [
"# Step 4: Define the Autoencoder with modified architecture\n",
"class Autoencoder(nn.Module):\n",
" def __init__(self, input_dim=24, latent_dim=4):\n",
" super(Autoencoder, self).__init__()\n",
" # Encoder: deeper with dropout\n",
" self.encoder = nn.Sequential(\n",
" nn.Linear(input_dim, 16),\n",
" nn.ReLU(),\n",
" nn.Dropout(0.2),\n",
" nn.Linear(16, 8),\n",
" nn.ReLU(),\n",
" nn.Dropout(0.2),\n",
" nn.Linear(8, latent_dim),\n",
" nn.ReLU()\n",
" )\n",
" # Decoder: symmetric with dropout\n",
" self.decoder = nn.Sequential(\n",
" nn.Linear(latent_dim, 8),\n",
" nn.ReLU(),\n",
" nn.Dropout(0.2),\n",
" nn.Linear(8, 16),\n",
" nn.ReLU(),\n",
" nn.Dropout(0.2),\n",
" nn.Linear(16, input_dim)\n",
" )\n",
"\n",
" def forward(self, x):\n",
" encoded = self.encoder(x)\n",
" decoded = self.decoder(encoded)\n",
" return decoded, encoded\n",
"\n",
"# Initialize model\n",
"model = Autoencoder(input_dim=24, latent_dim=4).to(device)\n",
"criterion = nn.MSELoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"print('Step 4 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "U8vKKBBOX0v2"
},
"outputs": [],
"source": [
"# Step 5: Train the Autoencoder (30 epochs)\n",
"num_epochs = 30\n",
"losses = []\n",
"\n",
"model.train()\n",
"for epoch in range(num_epochs):\n",
" epoch_loss = 0\n",
" for batch in tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs}'):\
n",
" batch = batch.to(device)\n",
" optimizer.zero_grad()\n",
" output, _ = model(batch)\n",
" loss = criterion(output, batch)\n",
" loss.backward()\n",
" optimizer.step()\n",
" epoch_loss += loss.item()\n",
" avg_loss = epoch_loss / len(dataloader)\n",
" losses.append(avg_loss)\n",
" print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}')\n",
"\n",
"# Plot training loss\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(losses, color='blue', label='Training Loss')\n",
"plt.title('Autoencoder Training Loss')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('MSE Loss')\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()\n",
"print('Step 5 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "9gD2qJ-kX0v3"
},
"outputs": [],
"source": [
"# Step 6: Extract latent representations\n",
"model.eval()\n",
"latent_features = []\n",
"with torch.no_grad():\n",
" for batch in dataloader:\n",
" batch = batch.to(device)\n",
" _, encoded = model(batch)\n",
" latent_features.append(encoded.cpu().numpy())\n",
"latent_features = np.concatenate(latent_features, axis=0)\n",
"print('Step 6 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jJ2j3KxWX0v3"
},
"outputs": [],
"source": [
"# Step 7: Apply K-Means clustering\n",
"kmeans = KMeans(n_clusters=2, random_state=42)\n",
"cluster_labels = kmeans.fit_predict(latent_features)\n",
"print('Step 7 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "a3lKKBBOX0v4"
},
"outputs": [],
"source": [
"# Step 8: Evaluate clustering\n",
"silhouette = silhouette_score(latent_features, cluster_labels)\n",
"davies_bouldin = davies_bouldin_score(latent_features, cluster_labels)\n",
"calinski_harabasz = calinski_harabasz_score(latent_features,
cluster_labels)\n",
"\n",
"print('Clustering Evaluation Metrics:')\n",
"print(f'Silhouette Score: {silhouette:.4f}')\n",
"print(f'Davies-Bouldin Index: {davies_bouldin:.4f}')\n",
"print(f'Calinski-Harabasz Index: {calinski_harabasz:.4f}')\n",
"print('Step 8 complete')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "t3X5pGCRX0v4"
},
"outputs": [],
"source": [
"# Step 9: Visualize with t-SNE\n",
"tsne = TSNE(n_components=2, random_state=42)\n",
"tsne_features = tsne.fit_transform(latent_features)\n",
"\n",
"# Plot predicted clusters\n",
"plt.figure(figsize=(12, 5))\n",
"plt.subplot(1, 2, 1)\n",
"sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1],
hue=cluster_labels, palette=['blue', 'red'], legend='full')\n",
"plt.title('t-SNE: Predicted Clusters')\n",
"plt.xlabel('t-SNE Component 1')\n",
"plt.ylabel('t-SNE Component 2')\n",
"\n",
"# Plot true labels\n",
"plt.subplot(1, 2, 2)\n",
"sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=y,
palette=['green', 'purple'], legend='full')\n",
"plt.title('t-SNE: True Labels')\n",
"plt.xlabel('t-SNE Component 1')\n",
"plt.ylabel('t-SNE Component 2')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"print('Step 9 complete')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11"
},
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 0
}

Canusukurls
0% (1)
Canusukurls
1,959 pages
7) Psychological Foundations of Learning & Development-1
No ratings yet
7) Psychological Foundations of Learning & Development-1
75 pages
Pushing The Limits of Formation Evaluation While Drilling
100% (2)
Pushing The Limits of Formation Evaluation While Drilling
31 pages
Deep Learning Lab Manual - IGDTUW - Vinisky Kumar
100% (1)
Deep Learning Lab Manual - IGDTUW - Vinisky Kumar
33 pages
Pages From 16th+Mar+-+IUPAC+Nomenclature+ - +Top+50+NEET+Questions+ - +last+20+years-3
No ratings yet
Pages From 16th+Mar+-+IUPAC+Nomenclature+ - +Top+50+NEET+Questions+ - +last+20+years-3
15 pages
Font Transfer 2 Autoencoders
No ratings yet
Font Transfer 2 Autoencoders
78 pages
Vertopal.com HW4ML Project Code
No ratings yet
Vertopal.com HW4ML Project Code
24 pages
10_neural_nets_with_keras.ipynb (1)
No ratings yet
10_neural_nets_with_keras.ipynb (1)
159 pages
Kolmogorov-Arnold-Networks in Python
No ratings yet
Kolmogorov-Arnold-Networks in Python
8 pages
Skin Cancer Detection Using Deep Learning Models - Ipynb
No ratings yet
Skin Cancer Detection Using Deep Learning Models - Ipynb
189 pages
Face Recognition - Ipynb
No ratings yet
Face Recognition - Ipynb
128 pages
03_nonlin_poisson_pinns.ipynb
No ratings yet
03_nonlin_poisson_pinns.ipynb
59 pages
Yolo Step-by-Step - Ipynb
No ratings yet
Yolo Step-by-Step - Ipynb
447 pages
Deep Learning
No ratings yet
Deep Learning
46 pages
bldd_VIT_ResNet50v2_CustomCNN
No ratings yet
bldd_VIT_ResNet50v2_CustomCNN
38 pages
Lab 9
No ratings yet
Lab 9
29 pages
AM19_ADL_u-net-model
No ratings yet
AM19_ADL_u-net-model
37 pages
Optiver Interview
No ratings yet
Optiver Interview
15 pages
Effects of Batches - Jupyter Notebook
No ratings yet
Effects of Batches - Jupyter Notebook
73 pages
PyTorch Made Easy A Quick Overview
No ratings yet
PyTorch Made Easy A Quick Overview
55 pages
Internship Report Layout
No ratings yet
Internship Report Layout
2 pages
NN From Scratch
No ratings yet
NN From Scratch
5 pages
Deep Learning Programs Updated
No ratings yet
Deep Learning Programs Updated
24 pages
Assignment3 AL
No ratings yet
Assignment3 AL
23 pages
Autoencoder From Scratch
No ratings yet
Autoencoder From Scratch
21 pages
Mlp Pytorch Sigmoid Mse
No ratings yet
Mlp Pytorch Sigmoid Mse
20 pages
Mlp Pytorch Softmax Crossentr
No ratings yet
Mlp Pytorch Softmax Crossentr
20 pages
Kirkstall Presentation FINAL 71216 (Small)
No ratings yet
Kirkstall Presentation FINAL 71216 (Small)
28 pages
DL_merged
No ratings yet
DL_merged
19 pages
Lesson 3 - Practical Research
No ratings yet
Lesson 3 - Practical Research
17 pages
Python Deep Learning Lab Programs (2)
No ratings yet
Python Deep Learning Lab Programs (2)
35 pages
m2 Writing Techniques Ppt
No ratings yet
m2 Writing Techniques Ppt
16 pages
Personality psychology assignment 2
No ratings yet
Personality psychology assignment 2
3 pages
This Python Script Implements a Single
No ratings yet
This Python Script Implements a Single
6 pages
Crash Course On Tensorflow!: Vincent Lepetit!
No ratings yet
Crash Course On Tensorflow!: Vincent Lepetit!
63 pages
K-0020 (General Paper I) (W)
No ratings yet
K-0020 (General Paper I) (W)
16 pages
Experiment No 13 Final
No ratings yet
Experiment No 13 Final
9 pages
vertopal.com_HandWritten
No ratings yet
vertopal.com_HandWritten
13 pages
Skill7
No ratings yet
Skill7
11 pages
Hyper Parameteres: Dataset
No ratings yet
Hyper Parameteres: Dataset
13 pages
NN From Scratch PDF 1735495327
No ratings yet
NN From Scratch PDF 1735495327
19 pages
Autoencoder - MPL - Basic - Ipynb - Colaboratory PDF
No ratings yet
Autoencoder - MPL - Basic - Ipynb - Colaboratory PDF
21 pages
KickMaker Manual
No ratings yet
KickMaker Manual
7 pages
Assignment 3 DS5620
No ratings yet
Assignment 3 DS5620
11 pages
2020ijeecs v20 I2 pp736-743
No ratings yet
2020ijeecs v20 I2 pp736-743
8 pages
Document 2
No ratings yet
Document 2
8 pages
mlp-fromscratch__sigmoid-mse
No ratings yet
mlp-fromscratch__sigmoid-mse
13 pages
CIFAR_10_ Dataset_Using_CNN_Aniiiii_HTML
No ratings yet
CIFAR_10_ Dataset_Using_CNN_Aniiiii_HTML
8 pages
Week_7_-mnist-mlp
No ratings yet
Week_7_-mnist-mlp
7 pages
control kelvin nsitu
No ratings yet
control kelvin nsitu
7 pages
EncoderDecoderSeq2Seq DeepLSTM
No ratings yet
EncoderDecoderSeq2Seq DeepLSTM
7 pages
CV Lab Final AwaisKhan EE A
No ratings yet
CV Lab Final AwaisKhan EE A
7 pages
10.4.1 Lesson 2: NYS Common Core ELA & Literacy Curriculum
No ratings yet
10.4.1 Lesson 2: NYS Common Core ELA & Literacy Curriculum
10 pages
vertopal.com_HW4ML project starter code template
No ratings yet
vertopal.com_HW4ML project starter code template
6 pages
Predicting_Stock_Prices_with_Deep_Neural_Networks__1723509410
No ratings yet
Predicting_Stock_Prices_with_Deep_Neural_Networks__1723509410
5 pages
Code File
No ratings yet
Code File
6 pages
PyTorch Cheat Sheet & Quick Reference
No ratings yet
PyTorch Cheat Sheet & Quick Reference
6 pages
(23mca32) Practical 1 & Practical 2
No ratings yet
(23mca32) Practical 1 & Practical 2
9 pages
Fibercablelength Understanding
No ratings yet
Fibercablelength Understanding
5 pages
Raw Nitex
No ratings yet
Raw Nitex
5 pages
keras
No ratings yet
keras
4 pages
6081 Time
No ratings yet
6081 Time
5 pages
Học Tốt Tiếng Anh Lớp 10 Global Success 13. Unit 4 - 45 Minute Test
No ratings yet
Học Tốt Tiếng Anh Lớp 10 Global Success 13. Unit 4 - 45 Minute Test
4 pages
Code
No ratings yet
Code
4 pages
Final Code
No ratings yet
Final Code
16 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
10 pages
Optimization Techniques-1
No ratings yet
Optimization Techniques-1
3 pages
Softmax Regression Mnist
No ratings yet
Softmax Regression Mnist
3 pages
Deep Learning Practical
No ratings yet
Deep Learning Practical
12 pages
val
No ratings yet
val
9 pages
RNN - Urban - Jupyter Notebook
No ratings yet
RNN - Urban - Jupyter Notebook
3 pages
British Culture
No ratings yet
British Culture
3 pages
vit32_gptMD
No ratings yet
vit32_gptMD
6 pages
Diass Quarter 2 Week 1-2
No ratings yet
Diass Quarter 2 Week 1-2
7 pages
QFD quality function deployment & use case
No ratings yet
QFD quality function deployment & use case
1 page
Mnist
No ratings yet
Mnist
3 pages
ID6001_Homework_2b57bb1d39ec7c53700fa31dc04520dc
No ratings yet
ID6001_Homework_2b57bb1d39ec7c53700fa31dc04520dc
2 pages
Deep Learning With PyTorch 1
No ratings yet
Deep Learning With PyTorch 1
1 page
Icnoc-2022 Poster Presentation Template
No ratings yet
Icnoc-2022 Poster Presentation Template
1 page
Marc Edzel G. Danao PCAS-10-401A AST15 - Astronomy Research I
No ratings yet
Marc Edzel G. Danao PCAS-10-401A AST15 - Astronomy Research I
1 page
EPS BS UserManual
No ratings yet
EPS BS UserManual
8 pages
Discussion Physical Pharmacy Lab Report
No ratings yet
Discussion Physical Pharmacy Lab Report
1 page
ccs355 Lab Manual
No ratings yet
ccs355 Lab Manual
24 pages
Reasoned Document IRS T-45
No ratings yet
Reasoned Document IRS T-45
20 pages
Pathways 4 Listening & Speaking Unit 4 Test
100% (2)
Pathways 4 Listening & Speaking Unit 4 Test
8 pages
Shear Connection Beam To Beam by Fin Plate by Is Code
100% (1)
Shear Connection Beam To Beam by Fin Plate by Is Code
37 pages
Offer Letter 25006543
No ratings yet
Offer Letter 25006543
2 pages
Prince2 Flash Cards
No ratings yet
Prince2 Flash Cards
2 pages
How to a Developers Guide to 4k: Developer edition, #3
From Everand
How to a Developers Guide to 4k: Developer edition, #3
Xinc Cyberwizard
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

credit_card_clustering_autoencoder

Uploaded by

credit_card_clustering_autoencoder

Uploaded by

{

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.