Skip to content

Commit e76b886

Browse files
antmarakisnorvig
authored andcommitted
Update Comments in learning.py + PluralityLearner Update (#315)
* Update Comments on learning.py + PluralityLearner - Fixed some capitalization, spelling and quotation mistakes in comments - In the PluralityLearner function, the nested function "predict" always returns the same output for a dataset, without taking into account the input ("example"). I defaulted the input as an empty list, so that we don't have to create (or find) a dummy example when we want to simply find the most popular class. * Update learning.py * Update learning.py * Update learning.py * Made Requested Changes
1 parent 9689bbe commit e76b886

File tree

1 file changed

+46
-45
lines changed

1 file changed

+46
-45
lines changed

learning.py

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def setproblem(self, target, inputs=None, exclude=()):
116116
self.check_me()
117117

118118
def check_me(self):
119-
"Check that my fields make sense."
119+
"""Check that my fields make sense."""
120120
assert len(self.attrnames) == len(self.attrs)
121121
assert self.target in self.attrs
122122
assert self.target not in self.inputs
@@ -126,20 +126,20 @@ def check_me(self):
126126
list(map(self.check_example, self.examples))
127127

128128
def add_example(self, example):
129-
"Add an example to the list of examples, checking it first."
129+
"""Add an example to the list of examples, checking it first."""
130130
self.check_example(example)
131131
self.examples.append(example)
132132

133133
def check_example(self, example):
134-
"Raise ValueError if example has any invalid values."
134+
"""Raise ValueError if example has any invalid values."""
135135
if self.values:
136136
for a in self.attrs:
137137
if example[a] not in self.values[a]:
138138
raise ValueError('Bad value {} for attribute {} in {}'
139139
.format(example[a], self.attrnames[a], example))
140140

141141
def attrnum(self, attr):
142-
"Returns the number used for attr, which can be a name, or -n .. n-1."
142+
"""Returns the number used for attr, which can be a name, or -n .. n-1."""
143143
if isinstance(attr, str):
144144
return self.attrnames.index(attr)
145145
elif attr < 0:
@@ -148,7 +148,7 @@ def attrnum(self, attr):
148148
return attr
149149

150150
def sanitize(self, example):
151-
"Return a copy of example, with non-input attributes replaced by None."
151+
"""Return a copy of example, with non-input attributes replaced by None."""
152152
return [attr_i if i in self.inputs else None
153153
for i, attr_i in enumerate(example)]
154154

@@ -161,12 +161,11 @@ def __repr__(self):
161161

162162
def parse_csv(input, delim=','):
163163
r"""Input is a string consisting of lines, each line has comma-delimited
164-
fields. Convert this into a list of lists. Blank lines are skipped.
164+
fields. Convert this into a list of lists. Blank lines are skipped.
165165
Fields that look like numbers are converted to numbers.
166166
The delim defaults to ',' but '\t' and None are also reasonable values.
167167
>>> parse_csv('1, 2, 3 \n 0, 2, na')
168-
[[1, 2, 3], [0, 2, 'na']]
169-
"""
168+
[[1, 2, 3], [0, 2, 'na']]"""
170169
lines = [line for line in input.splitlines() if line.strip()]
171170
return [list(map(num_or_str, line.split(delim))) for line in lines]
172171

@@ -195,7 +194,7 @@ def __init__(self, observations=[], default=0):
195194
self.add(o)
196195

197196
def add(self, o):
198-
"Add an observation o to the distribution."
197+
"""Add an observation o to the distribution."""
199198
self.smooth_for(o)
200199
self.dictionary[o] += 1
201200
self.n_obs += 1
@@ -210,18 +209,18 @@ def smooth_for(self, o):
210209
self.sampler = None
211210

212211
def __getitem__(self, item):
213-
"Return an estimate of the probability of item."
212+
"""Return an estimate of the probability of item."""
214213
self.smooth_for(item)
215214
return self.dictionary[item] / self.n_obs
216215

217216
# (top() and sample() are not used in this module, but elsewhere.)
218217

219218
def top(self, n):
220-
"Return (count, obs) tuples for the n most frequent observations."
219+
"""Return (count, obs) tuples for the n most frequent observations."""
221220
return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])
222221

223222
def sample(self):
224-
"Return a random sample from the distribution."
223+
"""Return a random sample from the distribution."""
225224
if self.sampler is None:
226225
self.sampler = weighted_sampler(list(self.dictionary.keys()),
227226
list(self.dictionary.values()))
@@ -236,7 +235,7 @@ def PluralityLearner(dataset):
236235
most_popular = mode([e[dataset.target] for e in dataset.examples])
237236

238237
def predict(example):
239-
"Always return same result: the most popular from the training set."
238+
"""Always return same result: the most popular from the training set."""
240239
return most_popular
241240
return predict
242241

@@ -274,9 +273,9 @@ def class_probability(targetval):
274273

275274

276275
def NearestNeighborLearner(dataset, k=1):
277-
"k-NearestNeighbor: the k nearest neighbors vote."
276+
"""k-NearestNeighbor: the k nearest neighbors vote."""
278277
def predict(example):
279-
"Find the k closest, and have them vote for the best."
278+
"""Find the k closest items, and have them vote for the best."""
280279
best = heapq.nsmallest(k, ((dataset.distance(e, example), e)
281280
for e in dataset.examples))
282281
return mode(e[dataset.target] for (d, e) in best)
@@ -291,18 +290,18 @@ class DecisionFork:
291290
of branches, one for each of the attribute's values."""
292291

293292
def __init__(self, attr, attrname=None, branches=None):
294-
"Initialize by saying what attribute this node tests."
293+
"""Initialize by saying what attribute this node tests."""
295294
self.attr = attr
296295
self.attrname = attrname or attr
297296
self.branches = branches or {}
298297

299298
def __call__(self, example):
300-
"Given an example, classify it using the attribute and the branches."
299+
"""Given an example, classify it using the attribute and the branches."""
301300
attrvalue = example[self.attr]
302301
return self.branches[attrvalue](example)
303302

304303
def add(self, val, subtree):
305-
"Add a branch. If self.attr = val, go to the given subtree."
304+
"""Add a branch. If self.attr = val, go to the given subtree."""
306305
self.branches[val] = subtree
307306

308307
def display(self, indent=0):
@@ -319,7 +318,7 @@ def __repr__(self):
319318

320319
class DecisionLeaf:
321320

322-
"A leaf of a decision tree holds just a result."
321+
"""A leaf of a decision tree holds just a result."""
323322

324323
def __init__(self, result):
325324
self.result = result
@@ -337,7 +336,7 @@ def __repr__(self):
337336

338337

339338
def DecisionTreeLearner(dataset):
340-
"[Figure 18.5]"
339+
"""[Figure 18.5]"""
341340

342341
target, values = dataset.target, dataset.values
343342

@@ -365,21 +364,21 @@ def plurality_value(examples):
365364
return DecisionLeaf(popular)
366365

367366
def count(attr, val, examples):
368-
"Count the number of examples that have attr = val."
367+
"""Count the number of examples that have attr = val."""
369368
return len(e[attr] == val for e in examples) #count(e[attr] == val for e in examples)
370369

371370
def all_same_class(examples):
372-
"Are all these examples in the same target class?"
371+
"""Are all these examples in the same target class?"""
373372
class0 = examples[0][target]
374373
return all(e[target] == class0 for e in examples)
375374

376375
def choose_attribute(attrs, examples):
377-
"Choose the attribute with the highest information gain."
376+
"""Choose the attribute with the highest information gain."""
378377
return argmax_random_tie(attrs,
379378
key=lambda a: information_gain(a, examples))
380379

381380
def information_gain(attr, examples):
382-
"Return the expected reduction in entropy from splitting by attr."
381+
"""Return the expected reduction in entropy from splitting by attr."""
383382
def I(examples):
384383
return information_content([count(target, v, examples)
385384
for v in values[target]])
@@ -389,15 +388,15 @@ def I(examples):
389388
return I(examples) - remainder
390389

391390
def split_by(attr, examples):
392-
"Return a list of (val, examples) pairs for each val of attr."
391+
"""Return a list of (val, examples) pairs for each val of attr."""
393392
return [(v, [e for e in examples if e[attr] == v])
394393
for v in values[attr]]
395394

396395
return decision_tree_learning(dataset.examples, dataset.inputs)
397396

398397

399398
def information_content(values):
400-
"Number of bits to represent the probability distribution in values."
399+
"""Number of bits to represent the probability distribution in values."""
401400
probabilities = normalize(removeall(0, values))
402401
return sum(-p * math.log2(p) for p in probabilities)
403402

@@ -423,11 +422,11 @@ def find_examples(examples):
423422
raise NotImplementedError
424423

425424
def passes(example, test):
426-
"Does the example pass the test?"
425+
"""Does the example pass the test?"""
427426
raise NotImplementedError
428427

429428
def predict(example):
430-
"Predict the outcome for the first passing test."
429+
"""Predict the outcome for the first passing test."""
431430
for test, outcome in predict.decision_list:
432431
if passes(example, test):
433432
return outcome
@@ -443,7 +442,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3],
443442
"""
444443
Layered feed-forward network.
445444
hidden_layer_sizes: List of number of hidden units per hidden layer
446-
learning_rate: Learning rate of gradient decent
445+
learning_rate: Learning rate of gradient descent
447446
epoches: Number of passes over the dataset
448447
"""
449448

@@ -483,7 +482,7 @@ class NNUnit:
483482
"""
484483
Single Unit of Multiple Layer Neural Network
485484
inputs: Incoming connections
486-
weights: weights to incoming connections
485+
weights: Weights to incoming connections
487486
"""
488487

489488
def __init__(self, weights=None, inputs=None):
@@ -496,7 +495,7 @@ def __init__(self, weights=None, inputs=None):
496495
def network(input_units, hidden_layer_sizes, output_units):
497496
"""
498497
Create Directed Acyclic Network of given number layers.
499-
hidden_layers_sizes : list number of neuron units in each hidden layer
498+
hidden_layers_sizes : List number of neuron units in each hidden layer
500499
excluding input and output layers
501500
"""
502501
# Check for PerceptronLearner
@@ -623,8 +622,8 @@ def predict(example):
623622
# ______________________________________________________________________________
624623

625624

626-
def Linearlearner(dataset, learning_rate=0.01, epochs=100):
627-
"""Define with learner = Linearlearner(data); infer with learner(x)."""
625+
def LinearLearner(dataset, learning_rate=0.01, epochs=100):
626+
"""Define with learner = LinearLearner(data); infer with learner(x)."""
628627
idx_i = dataset.inputs
629628
idx_t = dataset.target # As of now, dataset.target gives only one index.
630629
examples = dataset.examples
@@ -698,7 +697,7 @@ def train(dataset):
698697

699698

700699
def WeightedMajority(predictors, weights):
701-
"Return a predictor that takes a weighted vote."
700+
"""Return a predictor that takes a weighted vote."""
702701
def predict(example):
703702
return weighted_mode((predictor(example) for predictor in predictors),
704703
weights)
@@ -708,7 +707,8 @@ def predict(example):
708707
def weighted_mode(values, weights):
709708
"""Return the value with the greatest total weight.
710709
>>> weighted_mode('abbaa', [1,2,3,1,2])
711-
'b'"""
710+
'b'
711+
"""
712712
totals = defaultdict(int)
713713
for v, w in zip(values, weights):
714714
totals[v] += w
@@ -727,7 +727,7 @@ def train(dataset, weights):
727727

728728

729729
def replicated_dataset(dataset, weights, n=None):
730-
"Copy dataset, replicating each example in proportion to its weight."
730+
"""Copy dataset, replicating each example in proportion to its weight."""
731731
n = n or len(dataset.examples)
732732
result = copy.copy(dataset)
733733
result.examples = weighted_replicate(dataset.examples, weights, n)
@@ -739,7 +739,8 @@ def weighted_replicate(seq, weights, n):
739739
seq proportional to the corresponding weight (filling in fractions
740740
randomly).
741741
>>> weighted_replicate('ABC', [1,2,1], 4)
742-
['A', 'B', 'B', 'C']"""
742+
['A', 'B', 'B', 'C']
743+
"""
743744
assert len(seq) == len(weights)
744745
weights = normalize(weights)
745746
wholes = [int(w * n) for w in weights]
@@ -755,7 +756,7 @@ def flatten(seqs): return sum(seqs, [])
755756

756757

757758
def test(predict, dataset, examples=None, verbose=0):
758-
"Return the proportion of the examples that are NOT correctly predicted."
759+
"""Return the proportion of the examples that are NOT correctly predicted."""
759760
if examples is None:
760761
examples = dataset.examples
761762
if len(examples) == 0:
@@ -787,7 +788,7 @@ def train_and_test(dataset, start, end):
787788
def cross_validation(learner, size, dataset, k=10, trials=1):
788789
"""Do k-fold cross_validate and return their mean.
789790
That is, keep out 1/k of the examples for testing on each of k runs.
790-
Shuffle the examples first; If trials>1, average over several shuffles.
791+
Shuffle the examples first; if trials>1, average over several shuffles.
791792
Returns Training error, Validataion error"""
792793
if k is None:
793794
k = len(dataset.examples)
@@ -820,11 +821,11 @@ def cross_validation(learner, size, dataset, k=10, trials=1):
820821

821822
def cross_validation_wrapper(learner, dataset, k=10, trials=1):
822823
"""
823-
Fig 18.8
824+
[Fig 18.8]
824825
Return the optimal value of size having minimum error
825826
on validataion set.
826-
err_train: a training error array, indexed by size
827-
err_val: a validataion error array, indexed by size
827+
err_train: A training error array, indexed by size
828+
err_val: A validataion error array, indexed by size
828829
"""
829830
err_val = []
830831
err_train = []
@@ -843,7 +844,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
843844

844845

845846
def leave_one_out(learner, dataset):
846-
"Leave one out cross-validation over the dataset."
847+
"""Leave one out cross-validation over the dataset."""
847848
return cross_validation(learner, size, dataset, k=len(dataset.examples))
848849

849850

@@ -878,7 +879,7 @@ def score(learner, size):
878879

879880

880881
def RestaurantDataSet(examples=None):
881-
"Build a DataSet of Restaurant waiting examples. [Figure 18.3]"
882+
"""Build a DataSet of Restaurant waiting examples. [Figure 18.3]"""
882883
return DataSet(name='restaurant', target='Wait', examples=examples,
883884
attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' +
884885
'Raining Reservation Type WaitEstimate Wait')
@@ -917,7 +918,7 @@ def T(attrname, branches):
917918

918919

919920
def SyntheticRestaurant(n=20):
920-
"Generate a DataSet with n examples."
921+
"""Generate a DataSet with n examples."""
921922
def gen():
922923
example = list(map(random.choice, restaurant.values))
923924
example[restaurant.target] = waiting_decision_tree(example)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy