Mlp-Fromscratch Sigmoid-Mse
Mlp-Fromscratch Sigmoid-Mse
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
torch: 1.9.0a0+d819a21
Imports
import matplotlib.pyplot as plt
import pandas as pd
import torch
%matplotlib inline
import time
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
RANDOM_SEED = 1
BATCH_SIZE = 100
NUM_EPOCHS = 50
##########################
### MNIST DATASET
##########################
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
Model Implementation
##########################
### MODEL
##########################
class MultilayerPerceptron():
self.num_classes = num_classes
# hidden 1
self.weight_1 = torch.zeros(num_hidden, num_features,
dtype=torch.float).normal_(0.0, 0.1)
self.bias_1 = torch.zeros(num_hidden, dtype=torch.float)
# output
self.weight_o = torch.zeros(self.num_classes, num_hidden,
dtype=torch.float).normal_(0.0, 0.1)
self.bias_o = torch.zeros(self.num_classes, dtype=torch.float)
# hidden 2
# input dim: [n_classes, n_hidden] dot [n_hidden, n_examples] .T
# output dim: [n_examples, n_classes]
z_2 = torch.mm(a_1, self.weight_o.t()) + self.bias_o
a_2 = torch.sigmoid(z_2)
return a_1, a_2
#########################
### Output layer weights
#########################
# onehot encoding
y_onehot = torch.FloatTensor(y.size(0), self.num_classes)
y_onehot.zero_()
y_onehot.scatter_(1, y.view(-1, 1).long(), 1)
# Part 1: dLoss/dOutWeights
## = dLoss/dOutAct * dOutAct/dOutNet * dOutNet/dOutWeight
## where DeltaOut = dLoss/dOutAct * dOutAct/dOutNet
## for convenient re-use
# [n_examples, n_hidden]
dz2__dw_out = a_1
#################################
# Part 2: dLoss/dHiddenWeights
## = DeltaOut * dOutNet/dHiddenAct * dHiddenAct/dHiddenNet * dHiddenNet/dWeight
# [n_classes, n_hidden]
dz2__a1 = self.weight_o
# [n_examples, n_hidden]
da1__dz1 = a_1 * (1. - a_1) # sigmoid derivative
# [n_examples, n_features]
dz1__dw1 = x
Training
####################################################
##### Training and evaluation wrappers
###################################################
minibatch_cost = []
epoch_cost = []
for e in range(num_epochs):
####################################################
##### Training
###################################################
torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
num_hidden=50,
num_classes=10)
Evaluation
plt.plot(range(len(minibatch_cost)), minibatch_cost)
plt.ylabel('Mean Squared Error')
plt.xlabel('Minibatch')
plt.show()
plt.plot(range(len(epoch_cost)), epoch_cost)
plt.ylabel('Mean Squared Error')
plt.xlabel('Epoch')
plt.show()
Visual Inspection
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 4)
for i in range(4):
ax[i].imshow(features[i].view(28, 28), cmap=matplotlib.cm.binary)
plt.show()