diff --git a/learning.py b/learning.py index 20e47d05b..399654073 100644 --- a/learning.py +++ b/learning.py @@ -4,7 +4,7 @@ removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, - open_data, sigmoid_derivative, probability, norm, matrix_multiplication + open_data, sigmoid_derivative, probability, norm, matrix_multiplication, relu, relu_derivative ) import copy @@ -652,7 +652,7 @@ def predict(example): def NeuralNetLearner(dataset, hidden_layer_sizes=None, - learning_rate=0.01, epochs=100): + learning_rate=0.01, epochs=100, activation = sigmoid): """Layered feed-forward network. hidden_layer_sizes: List of number of hidden units per hidden layer learning_rate: Learning rate of gradient descent @@ -664,9 +664,9 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=None, o_units = len(dataset.values[dataset.target]) # construct a network - raw_net = network(i_units, hidden_layer_sizes, o_units) + raw_net = network(i_units, hidden_layer_sizes, o_units, activation) learned_net = BackPropagationLearner(dataset, raw_net, - learning_rate, epochs) + learning_rate, epochs, activation) def predict(example): # Input nodes @@ -695,7 +695,7 @@ def random_weights(min_value, max_value, num_weights): return [random.uniform(min_value, max_value) for _ in range(num_weights)] -def BackPropagationLearner(dataset, net, learning_rate, epochs): +def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): """[Figure 18.23] The back-propagation algorithm for multilayer networks""" # Initialise weights for layer in net: @@ -743,8 +743,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] - # The activation function used is the sigmoid function - delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + # The activation function used is relu or sigmoid function + if node.activation == sigmoid: + delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + else: + delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Backward pass h_layers = n_layers - 2 @@ -756,7 +759,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] - delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + if activation == sigmoid: + delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + for j in range(h_units)] + else: + delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] # Update weights @@ -800,14 +807,14 @@ class NNUnit: weights: Weights to incoming connections """ - def __init__(self, weights=None, inputs=None): + def __init__(self, activation=sigmoid, weights=None, inputs=None): self.weights = weights or [] self.inputs = inputs or [] self.value = None - self.activation = sigmoid + self.activation = activation -def network(input_units, hidden_layer_sizes, output_units): +def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid): """Create Directed Acyclic Network of given number layers. hidden_layers_sizes : List number of neuron units in each hidden layer excluding input and output layers @@ -818,7 +825,7 @@ def network(input_units, hidden_layer_sizes, output_units): else: layers_sizes = [input_units] + [output_units] - net = [[NNUnit() for n in range(size)] + net = [[NNUnit(activation) for n in range(size)] for size in layers_sizes] n_layers = len(net) diff --git a/neural_nets.ipynb b/neural_nets.ipynb index ecdeedcde..fe632c27f 100644 --- a/neural_nets.ipynb +++ b/neural_nets.ipynb @@ -14,9 +14,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from learning import *\n", @@ -65,9 +63,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def NeuralNetLearner(dataset, hidden_layer_sizes=None,\n",
+       "                     learning_rate=0.01, epochs=100, activation = sigmoid):\n",
+       "    """Layered feed-forward network.\n",
+       "    hidden_layer_sizes: List of number of hidden units per hidden layer\n",
+       "    learning_rate: Learning rate of gradient descent\n",
+       "    epochs: Number of passes over the dataset\n",
+       "    """\n",
+       "\n",
+       "    hidden_layer_sizes = hidden_layer_sizes or [3]  # default value\n",
+       "    i_units = len(dataset.inputs)\n",
+       "    o_units = len(dataset.values[dataset.target])\n",
+       "\n",
+       "    # construct a network\n",
+       "    raw_net = network(i_units, hidden_layer_sizes, o_units, activation)\n",
+       "    learned_net = BackPropagationLearner(dataset, raw_net,\n",
+       "                                         learning_rate, epochs, activation)\n",
+       "\n",
+       "    def predict(example):\n",
+       "        # Input nodes\n",
+       "        i_nodes = learned_net[0]\n",
+       "\n",
+       "        # Activate input layer\n",
+       "        for v, n in zip(example, i_nodes):\n",
+       "            n.value = v\n",
+       "\n",
+       "        # Forward pass\n",
+       "        for layer in learned_net[1:]:\n",
+       "            for node in layer:\n",
+       "                inc = [n.value for n in node.inputs]\n",
+       "                in_val = dotproduct(inc, node.weights)\n",
+       "                node.value = node.activation(in_val)\n",
+       "\n",
+       "        # Hypothesis\n",
+       "        o_nodes = learned_net[-1]\n",
+       "        prediction = find_max_node(o_nodes)\n",
+       "        return prediction\n",
+       "\n",
+       "    return predict\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "psource(NeuralNetLearner)" ] @@ -169,21 +306,204 @@ "source": [ "### Implementation\n", "\n", - "First, we feed-forward the examples in our neural network. After that, we calculate the gradient for each layers' weights by using the chain rule. Once that is complete, we update all the weights using gradient descent. After running these for a given number of epochs, the function returns the trained Neural Network." + "First, we feed-forward the examples in our neural network. After that, we calculate the gradient for each layers' weights by using the chain rule. Once that is complete, we update all the weights using gradient descent. After running these for a given number of epochs, the function returns the trained Neural Network." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid):\n",
+       "    """[Figure 18.23] The back-propagation algorithm for multilayer networks"""\n",
+       "    # Initialise weights\n",
+       "    for layer in net:\n",
+       "        for node in layer:\n",
+       "            node.weights = random_weights(min_value=-0.5, max_value=0.5,\n",
+       "                                          num_weights=len(node.weights))\n",
+       "\n",
+       "    examples = dataset.examples\n",
+       "    '''\n",
+       "    As of now dataset.target gives an int instead of list,\n",
+       "    Changing dataset class will have effect on all the learners.\n",
+       "    Will be taken care of later.\n",
+       "    '''\n",
+       "    o_nodes = net[-1]\n",
+       "    i_nodes = net[0]\n",
+       "    o_units = len(o_nodes)\n",
+       "    idx_t = dataset.target\n",
+       "    idx_i = dataset.inputs\n",
+       "    n_layers = len(net)\n",
+       "\n",
+       "    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)\n",
+       "\n",
+       "    for epoch in range(epochs):\n",
+       "        # Iterate over each example\n",
+       "        for e in range(len(examples)):\n",
+       "            i_val = inputs[e]\n",
+       "            t_val = targets[e]\n",
+       "\n",
+       "            # Activate input layer\n",
+       "            for v, n in zip(i_val, i_nodes):\n",
+       "                n.value = v\n",
+       "\n",
+       "            # Forward pass\n",
+       "            for layer in net[1:]:\n",
+       "                for node in layer:\n",
+       "                    inc = [n.value for n in node.inputs]\n",
+       "                    in_val = dotproduct(inc, node.weights)\n",
+       "                    node.value = node.activation(in_val)\n",
+       "\n",
+       "            # Initialize delta\n",
+       "            delta = [[] for _ in range(n_layers)]\n",
+       "\n",
+       "            # Compute outer layer delta\n",
+       "\n",
+       "            # Error for the MSE cost function\n",
+       "            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]\n",
+       "\n",
+       "            # The activation function used is relu or sigmoid function\n",
+       "            if node.activation == sigmoid:\n",
+       "                delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+       "            else:\n",
+       "                delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+       "\n",
+       "            # Backward pass\n",
+       "            h_layers = n_layers - 2\n",
+       "            for i in range(h_layers, 0, -1):\n",
+       "                layer = net[i]\n",
+       "                h_units = len(layer)\n",
+       "                nx_layer = net[i+1]\n",
+       "\n",
+       "                # weights from each ith layer node to each i + 1th layer node\n",
+       "                w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]\n",
+       "\n",
+       "                if activation == sigmoid:\n",
+       "                    delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+       "                            for j in range(h_units)]\n",
+       "                else:\n",
+       "                    delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+       "                            for j in range(h_units)]\n",
+       "\n",
+       "            #  Update weights\n",
+       "            for i in range(1, n_layers):\n",
+       "                layer = net[i]\n",
+       "                inc = [node.value for node in net[i-1]]\n",
+       "                units = len(layer)\n",
+       "                for j in range(units):\n",
+       "                    layer[j].weights = vector_add(layer[j].weights,\n",
+       "                                                  scalar_vector_product(\n",
+       "                                                  learning_rate * delta[i][j], inc))\n",
+       "\n",
+       "    return net\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "psource(BackPropagationLearner)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -210,6 +530,13 @@ "\n", "To increase accuracy, you can (most of the time) add more layers and nodes. Unfortunately, increasing the number of layers or nodes also increases the computation cost and might result in overfitting." ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -221,14 +548,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.14" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/utils.py b/utils.py index 1ac0b13f7..5d91c88ef 100644 --- a/utils.py +++ b/utils.py @@ -273,7 +273,15 @@ def sigmoid(x): """Return activation value of x with sigmoid function""" return 1 / (1 + math.exp(-x)) - +def relu(x): + return max(0, x) + +def relu_derivative(value): + if value > 0: + return 1 + else: + return 0 + def step(x): """Return activation value of x with sign function""" return 1 if x >= 0 else 0 pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy