From 2171e27f7a710ef422ec0c136bbac85ebab66a2b Mon Sep 17 00:00:00 2001
From: Leo552 <leo@terminalconvergence.com>
Date: Mon, 27 Jul 2020 14:02:41 +0100
Subject: [PATCH] Added RMS prop and nesterov momentum

---
 numba_neural_network.py | 75 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 8 deletions(-)

diff --git a/numba_neural_network.py b/numba_neural_network.py
index 0e7861d..e26dba8 100644
--- a/numba_neural_network.py
+++ b/numba_neural_network.py
@@ -8,6 +8,7 @@
 import warnings
 
 warnings.simplefilter('ignore', category=NumbaTypeSafetyWarning)
+DELTA = 1e-7
 
 spec = [
     ("layer_sizes", types.ListType(types.int64)),
@@ -16,19 +17,30 @@
     ("biases", types.ListType(types.float64[:, ::1])),
     ("layer_outputs", types.ListType(types.float64[:, ::1])),
     ("learning_rate", types.float64),
+    ("rho", types.float64),
+    ("r_accum", types.ListType(types.float64[:, ::1])),
+    ("velocity", types.ListType(types.float64[:, ::1])),
+    ("alpha", types.float64),
+    ("theta_hat", types.ListType(types.float64[:, ::1])),
 ]
 @jitclass(spec)
 class NeuralNetwork:
-    def __init__(self, layer_sizes, layer_activations, weights, biases, layer_outputs, learning_rate):
+    def __init__(self, layer_sizes, layer_activations, weights, biases, layer_outputs, 
+                 learning_rate, rho, r_accum, velocity, alpha, theta_hat):
         self.layer_sizes = layer_sizes
         self.layer_activations = layer_activations
         self.weights = weights
         self.biases = biases
         self.layer_outputs = layer_outputs
         self.learning_rate = learning_rate
-
-
-def make_neural_network(layer_sizes, layer_activations, learning_rate=0.05, low=-2, high=2):
+        self.rho = rho
+        self.r_accum = r_accum
+        self.velocity = velocity
+        self.alpha = alpha
+        self.theta_hat = theta_hat
+
+#                                            changed learning from 0.05    
+def make_neural_network(layer_sizes, layer_activations, learning_rate=0.001, low=-2, high=2, rho = 0.9, alpha = 0.9):
     for size in layer_sizes:
         assert size > 0
 
@@ -63,7 +75,26 @@ def make_neural_network(layer_sizes, layer_activations, learning_rate=0.05, low=
     # print(typeof(typed_layer_outputs))
 
     typed_learning_rate = learning_rate
-    return NeuralNetwork(typed_layer_sizes, typed_layer_activations, typed_weights, typed_biases, typed_layer_outputs, typed_learning_rate)
+    typed_rho = rho
+        
+    # Initialize empty list of the accumulation variable r.
+    typed_r_accum = typed.List()
+    for i in range(1, len(layer_sizes)):
+        typed_r_accum.append(np.zeros((layer_sizes[i-1], layer_sizes[i])))
+    
+    # Initialize empty list of the velocity variable
+    typed_velocity = typed.List()
+    for i in range(1, len(layer_sizes)):
+        typed_velocity.append(np.zeros((layer_sizes[i-1], layer_sizes[i])))
+    
+    typed_alpha = alpha
+    
+    typed_theta_hat = typed_weights
+        
+    return NeuralNetwork(typed_layer_sizes, typed_layer_activations, typed_weights,
+                         typed_biases, typed_layer_outputs, typed_learning_rate,
+                         typed_rho, typed_r_accum, typed_velocity, typed_alpha,
+                         typed_theta_hat)
 
 
 @njit
@@ -87,18 +118,45 @@ def feed_forward_layers(input_data, nn):
 def train_single(input_data, desired_output_data, nn):
     assert len(input_data) == nn.layer_sizes[0]
     assert len(desired_output_data) == nn.layer_sizes[-1]
+    length_weights = len(nn.weights)
+    
+    # Nesterov momentum - adjust the parameters first
+    for i in prange(length_weights):
+        nn.weights[i] += nn.alpha * nn.velocity[i]
+        
+    # Feed forward pass
     feed_forward_layers(input_data, nn)
 
     error = (desired_output_data - nn.layer_outputs[-1]) * nn.layer_activations[-1](nn.layer_outputs[-1], True)
-    nn.weights[-1] += nn.learning_rate * nn.layer_outputs[-2] * error.T
+    
+    # Calculate the gradient
+    g = nn.layer_outputs[-2] * error.T
+    
+    # Update the accumulation variable
+    nn.r_accum[-1] = nn.rho * nn.r_accum[-1] + (1 - nn.rho) * g ** 2
+    
+    # Compute the velocit update
+    nn.velocity[-1] = nn.alpha * nn.velocity[-1] + nn.learning_rate/np.sqrt(DELTA + nn.r_accum[-1]) * g
+    
+    nn.weights[-1] = nn.theta_hat[-1] + nn.velocity[-1]
     nn.biases[-1] += nn.learning_rate * error
 
-    length_weights = len(nn.weights)
     for i in prange(1, length_weights):
         i = length_weights - i - 1
+
         error = np.dot(nn.weights[i+1], error) * nn.layer_activations[i](nn.layer_outputs[i+1], True)
-        nn.weights[i] += nn.learning_rate * nn.layer_outputs[i] * error.T
+        g =  nn.layer_outputs[i] * error.T
+        
+        # Update the accumulation variable
+        nn.r_accum[i] = nn.rho * nn.r_accum[i] + (1 - nn.rho) * g ** 2
+        
+        # Compute the velocit update
+        nn.velocity[i] = nn.alpha * nn.velocity[i] + nn.learning_rate/np.sqrt(DELTA + nn.r_accum[i]) * g
+        
+        nn.weights[i] = nn.theta_hat[i] + nn.velocity[i]
         nn.biases[i] += nn.learning_rate * error
+    
+    nn.theta_hat = nn.weights
     return nn
 
 
@@ -129,6 +187,7 @@ def train_auto(train_input_data, train_desired_output_data, validate_input_data,
     current_mse = 0.0
     epochs = 0
     while(current_mse < previous_mse):
+    #    print(nn.weights, '\n')
         epochs += 1
         previous_mse = calculate_MSE(validate_input_data, validate_output_data, nn)
         for i in range(len(train_input_data)):

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://github.com/Capsar/python-neural-network/compare/Capsar:cfdf083...Capsar:02d58fe.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://github.com/Capsar/python-neural-network/compare/Capsar:cfdf083...Capsar:02d58fe.patch" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://github.com/Capsar/python-neural-network/compare/Capsar:cfdf083...Capsar:02d58fe.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://github.com/Capsar/python-neural-network/compare/Capsar:cfdf083...Capsar:02d58fe.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>