diff --git a/learning.py b/learning.py index 20e47d05b..399654073 100644 --- a/learning.py +++ b/learning.py @@ -4,7 +4,7 @@ removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, - open_data, sigmoid_derivative, probability, norm, matrix_multiplication + open_data, sigmoid_derivative, probability, norm, matrix_multiplication, relu, relu_derivative ) import copy @@ -652,7 +652,7 @@ def predict(example): def NeuralNetLearner(dataset, hidden_layer_sizes=None, - learning_rate=0.01, epochs=100): + learning_rate=0.01, epochs=100, activation = sigmoid): """Layered feed-forward network. hidden_layer_sizes: List of number of hidden units per hidden layer learning_rate: Learning rate of gradient descent @@ -664,9 +664,9 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=None, o_units = len(dataset.values[dataset.target]) # construct a network - raw_net = network(i_units, hidden_layer_sizes, o_units) + raw_net = network(i_units, hidden_layer_sizes, o_units, activation) learned_net = BackPropagationLearner(dataset, raw_net, - learning_rate, epochs) + learning_rate, epochs, activation) def predict(example): # Input nodes @@ -695,7 +695,7 @@ def random_weights(min_value, max_value, num_weights): return [random.uniform(min_value, max_value) for _ in range(num_weights)] -def BackPropagationLearner(dataset, net, learning_rate, epochs): +def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): """[Figure 18.23] The back-propagation algorithm for multilayer networks""" # Initialise weights for layer in net: @@ -743,8 +743,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] - # The activation function used is the sigmoid function - delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + # The activation function used is relu or sigmoid function + if node.activation == sigmoid: + delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] + else: + delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Backward pass h_layers = n_layers - 2 @@ -756,7 +759,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] - delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + if activation == sigmoid: + delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) + for j in range(h_units)] + else: + delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] # Update weights @@ -800,14 +807,14 @@ class NNUnit: weights: Weights to incoming connections """ - def __init__(self, weights=None, inputs=None): + def __init__(self, activation=sigmoid, weights=None, inputs=None): self.weights = weights or [] self.inputs = inputs or [] self.value = None - self.activation = sigmoid + self.activation = activation -def network(input_units, hidden_layer_sizes, output_units): +def network(input_units, hidden_layer_sizes, output_units, activation=sigmoid): """Create Directed Acyclic Network of given number layers. hidden_layers_sizes : List number of neuron units in each hidden layer excluding input and output layers @@ -818,7 +825,7 @@ def network(input_units, hidden_layer_sizes, output_units): else: layers_sizes = [input_units] + [output_units] - net = [[NNUnit() for n in range(size)] + net = [[NNUnit(activation) for n in range(size)] for size in layers_sizes] n_layers = len(net) diff --git a/neural_nets.ipynb b/neural_nets.ipynb index ecdeedcde..fe632c27f 100644 --- a/neural_nets.ipynb +++ b/neural_nets.ipynb @@ -14,9 +14,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from learning import *\n", @@ -65,9 +63,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "def NeuralNetLearner(dataset, hidden_layer_sizes=None,\n",
+ " learning_rate=0.01, epochs=100, activation = sigmoid):\n",
+ " """Layered feed-forward network.\n",
+ " hidden_layer_sizes: List of number of hidden units per hidden layer\n",
+ " learning_rate: Learning rate of gradient descent\n",
+ " epochs: Number of passes over the dataset\n",
+ " """\n",
+ "\n",
+ " hidden_layer_sizes = hidden_layer_sizes or [3] # default value\n",
+ " i_units = len(dataset.inputs)\n",
+ " o_units = len(dataset.values[dataset.target])\n",
+ "\n",
+ " # construct a network\n",
+ " raw_net = network(i_units, hidden_layer_sizes, o_units, activation)\n",
+ " learned_net = BackPropagationLearner(dataset, raw_net,\n",
+ " learning_rate, epochs, activation)\n",
+ "\n",
+ " def predict(example):\n",
+ " # Input nodes\n",
+ " i_nodes = learned_net[0]\n",
+ "\n",
+ " # Activate input layer\n",
+ " for v, n in zip(example, i_nodes):\n",
+ " n.value = v\n",
+ "\n",
+ " # Forward pass\n",
+ " for layer in learned_net[1:]:\n",
+ " for node in layer:\n",
+ " inc = [n.value for n in node.inputs]\n",
+ " in_val = dotproduct(inc, node.weights)\n",
+ " node.value = node.activation(in_val)\n",
+ "\n",
+ " # Hypothesis\n",
+ " o_nodes = learned_net[-1]\n",
+ " prediction = find_max_node(o_nodes)\n",
+ " return prediction\n",
+ "\n",
+ " return predict\n",
+ "
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid):\n",
+ " """[Figure 18.23] The back-propagation algorithm for multilayer networks"""\n",
+ " # Initialise weights\n",
+ " for layer in net:\n",
+ " for node in layer:\n",
+ " node.weights = random_weights(min_value=-0.5, max_value=0.5,\n",
+ " num_weights=len(node.weights))\n",
+ "\n",
+ " examples = dataset.examples\n",
+ " '''\n",
+ " As of now dataset.target gives an int instead of list,\n",
+ " Changing dataset class will have effect on all the learners.\n",
+ " Will be taken care of later.\n",
+ " '''\n",
+ " o_nodes = net[-1]\n",
+ " i_nodes = net[0]\n",
+ " o_units = len(o_nodes)\n",
+ " idx_t = dataset.target\n",
+ " idx_i = dataset.inputs\n",
+ " n_layers = len(net)\n",
+ "\n",
+ " inputs, targets = init_examples(examples, idx_i, idx_t, o_units)\n",
+ "\n",
+ " for epoch in range(epochs):\n",
+ " # Iterate over each example\n",
+ " for e in range(len(examples)):\n",
+ " i_val = inputs[e]\n",
+ " t_val = targets[e]\n",
+ "\n",
+ " # Activate input layer\n",
+ " for v, n in zip(i_val, i_nodes):\n",
+ " n.value = v\n",
+ "\n",
+ " # Forward pass\n",
+ " for layer in net[1:]:\n",
+ " for node in layer:\n",
+ " inc = [n.value for n in node.inputs]\n",
+ " in_val = dotproduct(inc, node.weights)\n",
+ " node.value = node.activation(in_val)\n",
+ "\n",
+ " # Initialize delta\n",
+ " delta = [[] for _ in range(n_layers)]\n",
+ "\n",
+ " # Compute outer layer delta\n",
+ "\n",
+ " # Error for the MSE cost function\n",
+ " err = [t_val[i] - o_nodes[i].value for i in range(o_units)]\n",
+ "\n",
+ " # The activation function used is relu or sigmoid function\n",
+ " if node.activation == sigmoid:\n",
+ " delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+ " else:\n",
+ " delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]\n",
+ "\n",
+ " # Backward pass\n",
+ " h_layers = n_layers - 2\n",
+ " for i in range(h_layers, 0, -1):\n",
+ " layer = net[i]\n",
+ " h_units = len(layer)\n",
+ " nx_layer = net[i+1]\n",
+ "\n",
+ " # weights from each ith layer node to each i + 1th layer node\n",
+ " w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]\n",
+ "\n",
+ " if activation == sigmoid:\n",
+ " delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+ " for j in range(h_units)]\n",
+ " else:\n",
+ " delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])\n",
+ " for j in range(h_units)]\n",
+ "\n",
+ " # Update weights\n",
+ " for i in range(1, n_layers):\n",
+ " layer = net[i]\n",
+ " inc = [node.value for node in net[i-1]]\n",
+ " units = len(layer)\n",
+ " for j in range(units):\n",
+ " layer[j].weights = vector_add(layer[j].weights,\n",
+ " scalar_vector_product(\n",
+ " learning_rate * delta[i][j], inc))\n",
+ "\n",
+ " return net\n",
+ "
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: