From be28b8d1d05f523785d98e8ac23b9a03743a679f Mon Sep 17 00:00:00 2001 From: Noumanmufc1 Date: Thu, 20 Sep 2018 03:15:35 +0500 Subject: [PATCH 1/2] added relu activation --- learning.py | 31 ++-- neural_nets.ipynb | 351 ++++++++++++++++++++++++++++++++++++++++++++-- utils.py | 10 +- 3 files changed, 367 insertions(+), 25 deletions(-) diff --git a/learning.py b/learning.py index 20e47d05b..f88b0498d 100644 --- a/learning.py +++ b/learning.py @@ -4,7 +4,7 @@ removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, - open_data, sigmoid_derivative, probability, norm, matrix_multiplication + open_data, sigmoid_derivative, probability, norm, matrix_multiplication, relu, relu_derivative ) import copy @@ -652,7 +652,7 @@ def predict(example): def NeuralNetLearner(dataset, hidden_layer_sizes=None, - learning_rate=0.01, epochs=100): + learning_rate=0.01, epochs=100, activation = sigmoid): """Layered feed-forward network. hidden_layer_sizes: List of number of hidden units per hidden layer learning_rate: Learning rate of gradient descent @@ -664,9 +664,9 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=None, o_units = len(dataset.values[dataset.target]) # construct a network - raw_net = network(i_units, hidden_layer_sizes, o_units) + raw_net = network(i_units, hidden_layer_sizes, o_units, activation) learned_net = BackPropagationLearner(dataset, raw_net, - learning_rate, epochs) + learning_rate, epochs, activation) def predict(example): # Input nodes @@ -695,7 +695,7 @@ def random_weights(min_value, max_value, num_weights): return [random.uniform(min_value, max_value) for _ in range(num_weights)] -def BackPropagationLearner(dataset, net, learning_rate, epochs): +def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): """[Figure 18.23] The back-propagation algorithm for multilayer networks""" # Initialise weights for layer in net: @@ -743,8 +743,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] - # The activation function used is the sigmoid function - delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + # The activation function used is relu or sigmoid function + if node.activation == sigmoid: + delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + else: + delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Backward pass h_layers = n_layers - 2 @@ -756,7 +759,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] - delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + if activation == sigmoid: + delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + for j in range(h_units)] + else: + delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] # Update weights @@ -800,14 +807,14 @@ class NNUnit: weights: Weights to incoming connections """ - def __init__(self, weights=None, inputs=None): + def __init__(self, activation, weights=None, inputs=None): self.weights = weights or [] self.inputs = inputs or [] self.value = None - self.activation = sigmoid + self.activation = activation -def network(input_units, hidden_layer_sizes, output_units): +def network(input_units, hidden_layer_sizes, output_units, activation): """Create Directed Acyclic Network of given number layers. hidden_layers_sizes : List number of neuron units in each hidden layer excluding input and output layers @@ -818,7 +825,7 @@ def network(input_units, hidden_layer_sizes, output_units): else: layers_sizes = [input_units] + [output_units] - net = [[NNUnit() for n in range(size)] + net = [[NNUnit(activation) for n in range(size)] for size in layers_sizes] n_layers = len(net) diff --git a/neural_nets.ipynb b/neural_nets.ipynb index ecdeedcde..fe632c27f 100644 --- a/neural_nets.ipynb +++ b/neural_nets.ipynb @@ -14,9 +14,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from learning import *\n", @@ -65,9 +63,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def NeuralNetLearner(dataset, hidden_layer_sizes=None,\n",
+       "                     learning_rate=0.01, epochs=100, activation = sigmoid):\n",
+       "    """Layered feed-forward network.\n",
+       "    hidden_layer_sizes: List of number of hidden units per hidden layer\n",
+       "    learning_rate: Learning rate of gradient descent\n",
+       "    epochs: Number of passes over the dataset\n",
+       "    """\n",
+       "\n",
+       "    hidden_layer_sizes = hidden_layer_sizes or [3]  # default value\n",
+       "    i_units = len(dataset.inputs)\n",
+       "    o_units = len(dataset.values[dataset.target])\n",
+       "\n",
+       "    # construct a network\n",
+       "    raw_net = network(i_units, hidden_layer_sizes, o_units, activation)\n",
+       "    learned_net = BackPropagationLearner(dataset, raw_net,\n",
+       "                                         learning_rate, epochs, activation)\n",
+       "\n",
+       "    def predict(example):\n",
+       "        # Input nodes\n",
+       "        i_nodes = learned_net[0]\n",
+       "\n",
+       "        # Activate input layer\n",
+       "        for v, n in zip(example, i_nodes):\n",
+       "            n.value = v\n",
+       "\n",
+       "        # Forward pass\n",
+       "        for layer in learned_net[1:]:\n",
+       "            for node in layer:\n",
+       "                inc = [n.value for n in node.inputs]\n",
+       "                in_val = dotproduct(inc, node.weights)\n",
+       "                node.value = node.activation(in_val)\n",
+       "\n",
+       "        # Hypothesis\n",
+       "        o_nodes = learned_net[-1]\n",
+       "        prediction = find_max_node(o_nodes)\n",
+       "        return prediction\n",
+       "\n",
+       "    return predict\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "psource(NeuralNetLearner)" ] @@ -169,21 +306,204 @@ "source": [ "### Implementation\n", "\n", - "First, we feed-forward the examples in our neural network. After that, we calculate the gradient for each layers' weights by using the chain rule. Once that is complete, we update all the weights using gradient descent. After running these for a given number of epochs, the function returns the trained Neural Network." + "First, we feed-forward the examples in our neural network. After that, we calculate the gradient for each layers' weights by using the chain rule. Once that is complete, we update all the weights using gradient descent. After running these for a given number of epochs, the function returns the trained Neural Network." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid):\n",
+       "    """[Figure 18.23] The back-propagation algorithm for multilayer networks"""\n",
+       "    # Initialise weights\n",
+       "    for layer in net:\n",
+       "        for node in layer:\n",
+       "            node.weights = random_weights(min_value=-0.5, max_value=0.5,\n",
+       "                                          num_weights=len(node.weights))\n",
+       "\n",
+       "    examples = dataset.examples\n",
+       "    '''\n",
+       "    As of now dataset.target gives an int instead of list,\n",
+       "    Changing dataset class will have effect on all the learners.\n",
+       "    Will be taken care of later.\n",
+       "    '''\n",
+       "    o_nodes = net[-1]\n",
+       "    i_nodes = net[0]\n",
+       "    o_units = len(o_nodes)\n",
+       "    idx_t = dataset.target\n",
+       "    idx_i = dataset.inputs\n",
+       "    n_layers = len(net)\n",
+       "\n",
+       "    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)\n",
+       "\n",
+       "    for epoch in range(epochs):\n",
+       "        # Iterate over each example\n",
+       "        for e in range(len(examples)):\n",
+       "            i_val = inputs[e]\n",
+       "            t_val = targets[e]\n",
+       "\n",
+       "            # Activate input layer\n",
+       "            for v, n in zip(i_val, i_nodes):\n",
+       "                n.value = v\n",
+       "\n",
+       "            # Forward pass\n",
+       "            for layer in net[1:]:\n",
+       "                for node in layer:\n",
+       "                    inc = [n.value for n in node.inputs]\n",
+       "                    in_val = dotproduct(inc, node.weights)\n",
+       "                    node.value = node.activation(in_val)\n",
+       "\n",
+       "            # Initialize delta\n",
+       "            delta = [[] for _ in range(n_layers)]\n",
+       "\n",
+       "            # Compute outer layer delta\n",
+       "\n",
+       "            # Error for the MSE cost function\n",
+       "            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]\n",
+       "\n",
+       "            # The activation function used is relu or sigmoid function\n",
+       "            if node.activation == sigmoid:\n",
+       "                delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+       "            else:\n",
+       "                delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+       "\n",
+       "            # Backward pass\n",
+       "            h_layers = n_layers - 2\n",
+       "            for i in range(h_layers, 0, -1):\n",
+       "                layer = net[i]\n",
+       "                h_units = len(layer)\n",
+       "                nx_layer = net[i+1]\n",
+       "\n",
+       "                # weights from each ith layer node to each i + 1th layer node\n",
+       "                w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]\n",
+       "\n",
+       "                if activation == sigmoid:\n",
+       "                    delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+       "                            for j in range(h_units)]\n",
+       "                else:\n",
+       "                    delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+       "                            for j in range(h_units)]\n",
+       "\n",
+       "            #  Update weights\n",
+       "            for i in range(1, n_layers):\n",
+       "                layer = net[i]\n",
+       "                inc = [node.value for node in net[i-1]]\n",
+       "                units = len(layer)\n",
+       "                for j in range(units):\n",
+       "                    layer[j].weights = vector_add(layer[j].weights,\n",
+       "                                                  scalar_vector_product(\n",
+       "                                                  learning_rate * delta[i][j], inc))\n",
+       "\n",
+       "    return net\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "psource(BackPropagationLearner)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -210,6 +530,13 @@ "\n", "To increase accuracy, you can (most of the time) add more layers and nodes. Unfortunately, increasing the number of layers or nodes also increases the computation cost and might result in overfitting." ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -221,14 +548,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.14" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/utils.py b/utils.py index 1ac0b13f7..5d91c88ef 100644 --- a/utils.py +++ b/utils.py @@ -273,7 +273,15 @@ def sigmoid(x): """Return activation value of x with sigmoid function""" return 1 / (1 + math.exp(-x)) - +def relu(x): + return max(0, x) + +def relu_derivative(value): + if value > 0: + return 1 + else: + return 0 + def step(x): """Return activation value of x with sign function""" return 1 if x >= 0 else 0 From 0ac4c212c292bafaf1f78619b7131899a94a2d4d Mon Sep 17 00:00:00 2001 From: Noumanmufc1 Date: Thu, 20 Sep 2018 03:25:13 +0500 Subject: [PATCH 2/2] added default parameters --- learning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learning.py b/learning.py index f88b0498d..399654073 100644 --- a/learning.py +++ b/learning.py @@ -807,14 +807,14 @@ class NNUnit: weights: Weights to incoming connections """ - def __init__(self, activation, weights=None, inputs=None): + def __init__(self, activation=sigmoid, weights=None, inputs=None): self.weights = weights or [] self.inputs = inputs or [] self.value = None self.activation = activation -def network(input_units, hidden_layer_sizes, output_units, activation): +def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid): """Create Directed Acyclic Network of given number layers. hidden_layers_sizes : List number of neuron units in each hidden layer excluding input and output layers pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy