Update Comments in learning.py + PluralityLearner Update (#315)

antmarakis · norvig · commit e76b8861cdc1 · 2017-03-06T21:40:00.000-08:00
* Update Comments on learning.py + PluralityLearner

- Fixed some capitalization, spelling and quotation mistakes in comments

- In the PluralityLearner function, the nested function "predict" always returns the same output for a dataset, without taking into account the input ("example"). I defaulted the input as an empty list, so that we don't have to create (or find) a dummy example when we want to simply find the most popular class.

* Update learning.py

* Update learning.py

* Update learning.py

* Made Requested Changes
diff --git a/learning.py b/learning.py
@@ -116,7 +116,7 @@ def setproblem(self, target, inputs=None, exclude=()):
         self.check_me()
 
     def check_me(self):
-        "Check that my fields make sense."
+        """Check that my fields make sense."""
         assert len(self.attrnames) == len(self.attrs)
         assert self.target in self.attrs
         assert self.target not in self.inputs
@@ -126,20 +126,20 @@ def check_me(self):
             list(map(self.check_example, self.examples))
 
     def add_example(self, example):
-        "Add an example to the list of examples, checking it first."
+        """Add an example to the list of examples, checking it first."""
         self.check_example(example)
         self.examples.append(example)
 
     def check_example(self, example):
-        "Raise ValueError if example has any invalid values."
+        """Raise ValueError if example has any invalid values."""
         if self.values:
             for a in self.attrs:
                 if example[a] not in self.values[a]:
                     raise ValueError('Bad value {} for attribute {} in {}'
                                      .format(example[a], self.attrnames[a], example))
 
     def attrnum(self, attr):
-        "Returns the number used for attr, which can be a name, or -n .. n-1."
+        """Returns the number used for attr, which can be a name, or -n .. n-1."""
         if isinstance(attr, str):
             return self.attrnames.index(attr)
         elif attr < 0:
@@ -148,7 +148,7 @@ def attrnum(self, attr):
             return attr
 
     def sanitize(self, example):
-        "Return a copy of example, with non-input attributes replaced by None."
+        """Return a copy of example, with non-input attributes replaced by None."""
         return [attr_i if i in self.inputs else None
                 for i, attr_i in enumerate(example)]
 
@@ -161,12 +161,11 @@ def __repr__(self):
 
 def parse_csv(input, delim=','):
     r"""Input is a string consisting of lines, each line has comma-delimited
-    fields.  Convert this into a list of lists.  Blank lines are skipped.
+    fields.  Convert this into a list of lists. Blank lines are skipped.
     Fields that look like numbers are converted to numbers.
     The delim defaults to ',' but '\t' and None are also reasonable values.
     >>> parse_csv('1, 2, 3 \n 0, 2, na')
-    [[1, 2, 3], [0, 2, 'na']]
-    """
+    [[1, 2, 3], [0, 2, 'na']]"""
     lines = [line for line in input.splitlines() if line.strip()]
     return [list(map(num_or_str, line.split(delim))) for line in lines]
 
@@ -195,7 +194,7 @@ def __init__(self, observations=[], default=0):
             self.add(o)
 
     def add(self, o):
-        "Add an observation o to the distribution."
+        """Add an observation o to the distribution."""
         self.smooth_for(o)
         self.dictionary[o] += 1
         self.n_obs += 1
@@ -210,18 +209,18 @@ def smooth_for(self, o):
             self.sampler = None
 
     def __getitem__(self, item):
-        "Return an estimate of the probability of item."
+        """Return an estimate of the probability of item."""
         self.smooth_for(item)
         return self.dictionary[item] / self.n_obs
 
     # (top() and sample() are not used in this module, but elsewhere.)
 
     def top(self, n):
-        "Return (count, obs) tuples for the n most frequent observations."
+        """Return (count, obs) tuples for the n most frequent observations."""
         return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])
 
     def sample(self):
-        "Return a random sample from the distribution."
+        """Return a random sample from the distribution."""
         if self.sampler is None:
             self.sampler = weighted_sampler(list(self.dictionary.keys()),
                                             list(self.dictionary.values()))
@@ -236,7 +235,7 @@ def PluralityLearner(dataset):
     most_popular = mode([e[dataset.target] for e in dataset.examples])
 
     def predict(example):
-        "Always return same result: the most popular from the training set."
+        """Always return same result: the most popular from the training set."""
         return most_popular
     return predict
 
@@ -274,9 +273,9 @@ def class_probability(targetval):
 
 
 def NearestNeighborLearner(dataset, k=1):
-    "k-NearestNeighbor: the k nearest neighbors vote."
+    """k-NearestNeighbor: the k nearest neighbors vote."""
     def predict(example):
-        "Find the k closest, and have them vote for the best."
+        """Find the k closest items, and have them vote for the best."""
         best = heapq.nsmallest(k, ((dataset.distance(e, example), e)
                                    for e in dataset.examples))
         return mode(e[dataset.target] for (d, e) in best)
@@ -291,18 +290,18 @@ class DecisionFork:
     of branches, one for each of the attribute's values."""
 
     def __init__(self, attr, attrname=None, branches=None):
-        "Initialize by saying what attribute this node tests."
+        """Initialize by saying what attribute this node tests."""
         self.attr = attr
         self.attrname = attrname or attr
         self.branches = branches or {}
 
     def __call__(self, example):
-        "Given an example, classify it using the attribute and the branches."
+        """Given an example, classify it using the attribute and the branches."""
         attrvalue = example[self.attr]
         return self.branches[attrvalue](example)
 
     def add(self, val, subtree):
-        "Add a branch.  If self.attr = val, go to the given subtree."
+        """Add a branch.  If self.attr = val, go to the given subtree."""
         self.branches[val] = subtree
 
     def display(self, indent=0):
@@ -319,7 +318,7 @@ def __repr__(self):
 
 class DecisionLeaf:
 
-    "A leaf of a decision tree holds just a result."
+    """A leaf of a decision tree holds just a result."""
 
     def __init__(self, result):
         self.result = result
@@ -337,7 +336,7 @@ def __repr__(self):
 
 
 def DecisionTreeLearner(dataset):
-    "[Figure 18.5]"
+    """[Figure 18.5]"""
 
     target, values = dataset.target, dataset.values
 
@@ -365,21 +364,21 @@ def plurality_value(examples):
         return DecisionLeaf(popular)
 
     def count(attr, val, examples):
-        "Count the number of examples that have attr = val."
+        """Count the number of examples that have attr = val."""
         return len(e[attr] == val for e in examples) #count(e[attr] == val for e in examples)
 
     def all_same_class(examples):
-        "Are all these examples in the same target class?"
+        """Are all these examples in the same target class?"""
         class0 = examples[0][target]
         return all(e[target] == class0 for e in examples)
 
     def choose_attribute(attrs, examples):
-        "Choose the attribute with the highest information gain."
+        """Choose the attribute with the highest information gain."""
         return argmax_random_tie(attrs,
                                  key=lambda a: information_gain(a, examples))
 
     def information_gain(attr, examples):
-        "Return the expected reduction in entropy from splitting by attr."
+        """Return the expected reduction in entropy from splitting by attr."""
         def I(examples):
             return information_content([count(target, v, examples)
                                         for v in values[target]])
@@ -389,15 +388,15 @@ def I(examples):
         return I(examples) - remainder
 
     def split_by(attr, examples):
-        "Return a list of (val, examples) pairs for each val of attr."
+        """Return a list of (val, examples) pairs for each val of attr."""
         return [(v, [e for e in examples if e[attr] == v])
                 for v in values[attr]]
 
     return decision_tree_learning(dataset.examples, dataset.inputs)
 
 
 def information_content(values):
-    "Number of bits to represent the probability distribution in values."
+    """Number of bits to represent the probability distribution in values."""
     probabilities = normalize(removeall(0, values))
     return sum(-p * math.log2(p) for p in probabilities)
 
@@ -423,11 +422,11 @@ def find_examples(examples):
         raise NotImplementedError
 
     def passes(example, test):
-        "Does the example pass the test?"
+        """Does the example pass the test?"""
         raise NotImplementedError
 
     def predict(example):
-        "Predict the outcome for the first passing test."
+        """Predict the outcome for the first passing test."""
         for test, outcome in predict.decision_list:
             if passes(example, test):
                 return outcome
@@ -443,7 +442,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3],
     """
     Layered feed-forward network.
     hidden_layer_sizes: List of number of hidden units per hidden layer
-    learning_rate: Learning rate of gradient decent
+    learning_rate: Learning rate of gradient descent
     epoches: Number of passes over the dataset
     """
 
@@ -483,7 +482,7 @@ class NNUnit:
     """
     Single Unit of Multiple Layer Neural Network
     inputs: Incoming connections
-    weights: weights to incoming connections
+    weights: Weights to incoming connections
     """
 
     def __init__(self, weights=None, inputs=None):
@@ -496,7 +495,7 @@ def __init__(self, weights=None, inputs=None):
 def network(input_units, hidden_layer_sizes, output_units):
     """
     Create Directed Acyclic Network of given number layers.
-    hidden_layers_sizes : list number of neuron units in each hidden layer
+    hidden_layers_sizes : List number of neuron units in each hidden layer
     excluding input and output layers
     """
     # Check for PerceptronLearner
@@ -623,8 +622,8 @@ def predict(example):
 # ______________________________________________________________________________
 
 
-def Linearlearner(dataset, learning_rate=0.01, epochs=100):
-    """Define with learner = Linearlearner(data); infer with learner(x)."""
+def LinearLearner(dataset, learning_rate=0.01, epochs=100):
+    """Define with learner = LinearLearner(data); infer with learner(x)."""
     idx_i = dataset.inputs
     idx_t = dataset.target  # As of now, dataset.target gives only one index.
     examples = dataset.examples
@@ -698,7 +697,7 @@ def train(dataset):
 
 
 def WeightedMajority(predictors, weights):
-    "Return a predictor that takes a weighted vote."
+    """Return a predictor that takes a weighted vote."""
     def predict(example):
         return weighted_mode((predictor(example) for predictor in predictors),
                              weights)
@@ -708,7 +707,8 @@ def predict(example):
 def weighted_mode(values, weights):
     """Return the value with the greatest total weight.
     >>> weighted_mode('abbaa', [1,2,3,1,2])
-    'b'"""
+    'b'
+    """
     totals = defaultdict(int)
     for v, w in zip(values, weights):
         totals[v] += w
@@ -727,7 +727,7 @@ def train(dataset, weights):
 
 
 def replicated_dataset(dataset, weights, n=None):
-    "Copy dataset, replicating each example in proportion to its weight."
+    """Copy dataset, replicating each example in proportion to its weight."""
     n = n or len(dataset.examples)
     result = copy.copy(dataset)
     result.examples = weighted_replicate(dataset.examples, weights, n)
@@ -739,7 +739,8 @@ def weighted_replicate(seq, weights, n):
     seq proportional to the corresponding weight (filling in fractions
     randomly).
     >>> weighted_replicate('ABC', [1,2,1], 4)
-    ['A', 'B', 'B', 'C']"""
+    ['A', 'B', 'B', 'C']
+    """
     assert len(seq) == len(weights)
     weights = normalize(weights)
     wholes = [int(w * n) for w in weights]
@@ -755,7 +756,7 @@ def flatten(seqs): return sum(seqs, [])
 
 
 def test(predict, dataset, examples=None, verbose=0):
-    "Return the proportion of the examples that are NOT correctly predicted."
+    """Return the proportion of the examples that are NOT correctly predicted."""
     if examples is None:
         examples = dataset.examples
     if len(examples) == 0:
@@ -787,7 +788,7 @@ def train_and_test(dataset, start, end):
 def cross_validation(learner, size, dataset, k=10, trials=1):
     """Do k-fold cross_validate and return their mean.
     That is, keep out 1/k of the examples for testing on each of k runs.
-    Shuffle the examples first; If trials>1, average over several shuffles.
+    Shuffle the examples first; if trials>1, average over several shuffles.
     Returns Training error, Validataion error"""
     if k is None:
         k = len(dataset.examples)
@@ -820,11 +821,11 @@ def cross_validation(learner, size, dataset, k=10, trials=1):
 
 def cross_validation_wrapper(learner, dataset, k=10, trials=1):
     """
-    Fig 18.8
+    [Fig 18.8]
     Return the optimal value of size having minimum error
     on validataion set.
-    err_train: a training error array, indexed by size
-    err_val: a validataion error array, indexed by size
+    err_train: A training error array, indexed by size
+    err_val: A validataion error array, indexed by size
     """
     err_val = []
     err_train = []
@@ -843,7 +844,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
 
 
 def leave_one_out(learner, dataset):
-    "Leave one out cross-validation over the dataset."
+    """Leave one out cross-validation over the dataset."""
     return cross_validation(learner, size, dataset, k=len(dataset.examples))
 
 
@@ -878,7 +879,7 @@ def score(learner, size):
 
 
 def RestaurantDataSet(examples=None):
-    "Build a DataSet of Restaurant waiting examples. [Figure 18.3]"
+    """Build a DataSet of Restaurant waiting examples. [Figure 18.3]"""
     return DataSet(name='restaurant', target='Wait', examples=examples,
                    attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' +
                    'Raining Reservation Type WaitEstimate Wait')
@@ -917,7 +918,7 @@ def T(attrname, branches):
 
 
 def SyntheticRestaurant(n=20):
-    "Generate a DataSet with n examples."
+    """Generate a DataSet with n examples."""
     def gen():
         example = list(map(random.choice, restaurant.values))
         example[restaurant.target] = waiting_decision_tree(example)