@@ -116,7 +116,7 @@ def setproblem(self, target, inputs=None, exclude=()):
116
116
self .check_me ()
117
117
118
118
def check_me (self ):
119
- "Check that my fields make sense."
119
+ """ Check that my fields make sense."" "
120
120
assert len (self .attrnames ) == len (self .attrs )
121
121
assert self .target in self .attrs
122
122
assert self .target not in self .inputs
@@ -126,20 +126,20 @@ def check_me(self):
126
126
list (map (self .check_example , self .examples ))
127
127
128
128
def add_example (self , example ):
129
- "Add an example to the list of examples, checking it first."
129
+ """ Add an example to the list of examples, checking it first."" "
130
130
self .check_example (example )
131
131
self .examples .append (example )
132
132
133
133
def check_example (self , example ):
134
- "Raise ValueError if example has any invalid values."
134
+ """ Raise ValueError if example has any invalid values."" "
135
135
if self .values :
136
136
for a in self .attrs :
137
137
if example [a ] not in self .values [a ]:
138
138
raise ValueError ('Bad value {} for attribute {} in {}'
139
139
.format (example [a ], self .attrnames [a ], example ))
140
140
141
141
def attrnum (self , attr ):
142
- "Returns the number used for attr, which can be a name, or -n .. n-1."
142
+ """ Returns the number used for attr, which can be a name, or -n .. n-1."" "
143
143
if isinstance (attr , str ):
144
144
return self .attrnames .index (attr )
145
145
elif attr < 0 :
@@ -148,7 +148,7 @@ def attrnum(self, attr):
148
148
return attr
149
149
150
150
def sanitize (self , example ):
151
- "Return a copy of example, with non-input attributes replaced by None."
151
+ """ Return a copy of example, with non-input attributes replaced by None."" "
152
152
return [attr_i if i in self .inputs else None
153
153
for i , attr_i in enumerate (example )]
154
154
@@ -161,12 +161,11 @@ def __repr__(self):
161
161
162
162
def parse_csv (input , delim = ',' ):
163
163
r"""Input is a string consisting of lines, each line has comma-delimited
164
- fields. Convert this into a list of lists. Blank lines are skipped.
164
+ fields. Convert this into a list of lists. Blank lines are skipped.
165
165
Fields that look like numbers are converted to numbers.
166
166
The delim defaults to ',' but '\t' and None are also reasonable values.
167
167
>>> parse_csv('1, 2, 3 \n 0, 2, na')
168
- [[1, 2, 3], [0, 2, 'na']]
169
- """
168
+ [[1, 2, 3], [0, 2, 'na']]"""
170
169
lines = [line for line in input .splitlines () if line .strip ()]
171
170
return [list (map (num_or_str , line .split (delim ))) for line in lines ]
172
171
@@ -195,7 +194,7 @@ def __init__(self, observations=[], default=0):
195
194
self .add (o )
196
195
197
196
def add (self , o ):
198
- "Add an observation o to the distribution."
197
+ """ Add an observation o to the distribution."" "
199
198
self .smooth_for (o )
200
199
self .dictionary [o ] += 1
201
200
self .n_obs += 1
@@ -210,18 +209,18 @@ def smooth_for(self, o):
210
209
self .sampler = None
211
210
212
211
def __getitem__ (self , item ):
213
- "Return an estimate of the probability of item."
212
+ """ Return an estimate of the probability of item."" "
214
213
self .smooth_for (item )
215
214
return self .dictionary [item ] / self .n_obs
216
215
217
216
# (top() and sample() are not used in this module, but elsewhere.)
218
217
219
218
def top (self , n ):
220
- "Return (count, obs) tuples for the n most frequent observations."
219
+ """ Return (count, obs) tuples for the n most frequent observations."" "
221
220
return heapq .nlargest (n , [(v , k ) for (k , v ) in self .dictionary .items ()])
222
221
223
222
def sample (self ):
224
- "Return a random sample from the distribution."
223
+ """ Return a random sample from the distribution."" "
225
224
if self .sampler is None :
226
225
self .sampler = weighted_sampler (list (self .dictionary .keys ()),
227
226
list (self .dictionary .values ()))
@@ -236,7 +235,7 @@ def PluralityLearner(dataset):
236
235
most_popular = mode ([e [dataset .target ] for e in dataset .examples ])
237
236
238
237
def predict (example ):
239
- "Always return same result: the most popular from the training set."
238
+ """ Always return same result: the most popular from the training set."" "
240
239
return most_popular
241
240
return predict
242
241
@@ -274,9 +273,9 @@ def class_probability(targetval):
274
273
275
274
276
275
def NearestNeighborLearner (dataset , k = 1 ):
277
- "k-NearestNeighbor: the k nearest neighbors vote."
276
+ """ k-NearestNeighbor: the k nearest neighbors vote."" "
278
277
def predict (example ):
279
- "Find the k closest, and have them vote for the best."
278
+ """ Find the k closest items , and have them vote for the best."" "
280
279
best = heapq .nsmallest (k , ((dataset .distance (e , example ), e )
281
280
for e in dataset .examples ))
282
281
return mode (e [dataset .target ] for (d , e ) in best )
@@ -291,18 +290,18 @@ class DecisionFork:
291
290
of branches, one for each of the attribute's values."""
292
291
293
292
def __init__ (self , attr , attrname = None , branches = None ):
294
- "Initialize by saying what attribute this node tests."
293
+ """ Initialize by saying what attribute this node tests."" "
295
294
self .attr = attr
296
295
self .attrname = attrname or attr
297
296
self .branches = branches or {}
298
297
299
298
def __call__ (self , example ):
300
- "Given an example, classify it using the attribute and the branches."
299
+ """ Given an example, classify it using the attribute and the branches."" "
301
300
attrvalue = example [self .attr ]
302
301
return self .branches [attrvalue ](example )
303
302
304
303
def add (self , val , subtree ):
305
- "Add a branch. If self.attr = val, go to the given subtree."
304
+ """ Add a branch. If self.attr = val, go to the given subtree."" "
306
305
self .branches [val ] = subtree
307
306
308
307
def display (self , indent = 0 ):
@@ -319,7 +318,7 @@ def __repr__(self):
319
318
320
319
class DecisionLeaf :
321
320
322
- "A leaf of a decision tree holds just a result."
321
+ """ A leaf of a decision tree holds just a result."" "
323
322
324
323
def __init__ (self , result ):
325
324
self .result = result
@@ -337,7 +336,7 @@ def __repr__(self):
337
336
338
337
339
338
def DecisionTreeLearner (dataset ):
340
- "[Figure 18.5]"
339
+ """ [Figure 18.5]"" "
341
340
342
341
target , values = dataset .target , dataset .values
343
342
@@ -365,21 +364,21 @@ def plurality_value(examples):
365
364
return DecisionLeaf (popular )
366
365
367
366
def count (attr , val , examples ):
368
- "Count the number of examples that have attr = val."
367
+ """ Count the number of examples that have attr = val."" "
369
368
return len (e [attr ] == val for e in examples ) #count(e[attr] == val for e in examples)
370
369
371
370
def all_same_class (examples ):
372
- "Are all these examples in the same target class?"
371
+ """ Are all these examples in the same target class?"" "
373
372
class0 = examples [0 ][target ]
374
373
return all (e [target ] == class0 for e in examples )
375
374
376
375
def choose_attribute (attrs , examples ):
377
- "Choose the attribute with the highest information gain."
376
+ """ Choose the attribute with the highest information gain."" "
378
377
return argmax_random_tie (attrs ,
379
378
key = lambda a : information_gain (a , examples ))
380
379
381
380
def information_gain (attr , examples ):
382
- "Return the expected reduction in entropy from splitting by attr."
381
+ """ Return the expected reduction in entropy from splitting by attr."" "
383
382
def I (examples ):
384
383
return information_content ([count (target , v , examples )
385
384
for v in values [target ]])
@@ -389,15 +388,15 @@ def I(examples):
389
388
return I (examples ) - remainder
390
389
391
390
def split_by (attr , examples ):
392
- "Return a list of (val, examples) pairs for each val of attr."
391
+ """ Return a list of (val, examples) pairs for each val of attr."" "
393
392
return [(v , [e for e in examples if e [attr ] == v ])
394
393
for v in values [attr ]]
395
394
396
395
return decision_tree_learning (dataset .examples , dataset .inputs )
397
396
398
397
399
398
def information_content (values ):
400
- "Number of bits to represent the probability distribution in values."
399
+ """ Number of bits to represent the probability distribution in values."" "
401
400
probabilities = normalize (removeall (0 , values ))
402
401
return sum (- p * math .log2 (p ) for p in probabilities )
403
402
@@ -423,11 +422,11 @@ def find_examples(examples):
423
422
raise NotImplementedError
424
423
425
424
def passes (example , test ):
426
- "Does the example pass the test?"
425
+ """ Does the example pass the test?"" "
427
426
raise NotImplementedError
428
427
429
428
def predict (example ):
430
- "Predict the outcome for the first passing test."
429
+ """ Predict the outcome for the first passing test."" "
431
430
for test , outcome in predict .decision_list :
432
431
if passes (example , test ):
433
432
return outcome
@@ -443,7 +442,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3],
443
442
"""
444
443
Layered feed-forward network.
445
444
hidden_layer_sizes: List of number of hidden units per hidden layer
446
- learning_rate: Learning rate of gradient decent
445
+ learning_rate: Learning rate of gradient descent
447
446
epoches: Number of passes over the dataset
448
447
"""
449
448
@@ -483,7 +482,7 @@ class NNUnit:
483
482
"""
484
483
Single Unit of Multiple Layer Neural Network
485
484
inputs: Incoming connections
486
- weights: weights to incoming connections
485
+ weights: Weights to incoming connections
487
486
"""
488
487
489
488
def __init__ (self , weights = None , inputs = None ):
@@ -496,7 +495,7 @@ def __init__(self, weights=None, inputs=None):
496
495
def network (input_units , hidden_layer_sizes , output_units ):
497
496
"""
498
497
Create Directed Acyclic Network of given number layers.
499
- hidden_layers_sizes : list number of neuron units in each hidden layer
498
+ hidden_layers_sizes : List number of neuron units in each hidden layer
500
499
excluding input and output layers
501
500
"""
502
501
# Check for PerceptronLearner
@@ -623,8 +622,8 @@ def predict(example):
623
622
# ______________________________________________________________________________
624
623
625
624
626
- def Linearlearner (dataset , learning_rate = 0.01 , epochs = 100 ):
627
- """Define with learner = Linearlearner (data); infer with learner(x)."""
625
+ def LinearLearner (dataset , learning_rate = 0.01 , epochs = 100 ):
626
+ """Define with learner = LinearLearner (data); infer with learner(x)."""
628
627
idx_i = dataset .inputs
629
628
idx_t = dataset .target # As of now, dataset.target gives only one index.
630
629
examples = dataset .examples
@@ -698,7 +697,7 @@ def train(dataset):
698
697
699
698
700
699
def WeightedMajority (predictors , weights ):
701
- "Return a predictor that takes a weighted vote."
700
+ """ Return a predictor that takes a weighted vote."" "
702
701
def predict (example ):
703
702
return weighted_mode ((predictor (example ) for predictor in predictors ),
704
703
weights )
@@ -708,7 +707,8 @@ def predict(example):
708
707
def weighted_mode (values , weights ):
709
708
"""Return the value with the greatest total weight.
710
709
>>> weighted_mode('abbaa', [1,2,3,1,2])
711
- 'b'"""
710
+ 'b'
711
+ """
712
712
totals = defaultdict (int )
713
713
for v , w in zip (values , weights ):
714
714
totals [v ] += w
@@ -727,7 +727,7 @@ def train(dataset, weights):
727
727
728
728
729
729
def replicated_dataset (dataset , weights , n = None ):
730
- "Copy dataset, replicating each example in proportion to its weight."
730
+ """ Copy dataset, replicating each example in proportion to its weight."" "
731
731
n = n or len (dataset .examples )
732
732
result = copy .copy (dataset )
733
733
result .examples = weighted_replicate (dataset .examples , weights , n )
@@ -739,7 +739,8 @@ def weighted_replicate(seq, weights, n):
739
739
seq proportional to the corresponding weight (filling in fractions
740
740
randomly).
741
741
>>> weighted_replicate('ABC', [1,2,1], 4)
742
- ['A', 'B', 'B', 'C']"""
742
+ ['A', 'B', 'B', 'C']
743
+ """
743
744
assert len (seq ) == len (weights )
744
745
weights = normalize (weights )
745
746
wholes = [int (w * n ) for w in weights ]
@@ -755,7 +756,7 @@ def flatten(seqs): return sum(seqs, [])
755
756
756
757
757
758
def test (predict , dataset , examples = None , verbose = 0 ):
758
- "Return the proportion of the examples that are NOT correctly predicted."
759
+ """ Return the proportion of the examples that are NOT correctly predicted."" "
759
760
if examples is None :
760
761
examples = dataset .examples
761
762
if len (examples ) == 0 :
@@ -787,7 +788,7 @@ def train_and_test(dataset, start, end):
787
788
def cross_validation (learner , size , dataset , k = 10 , trials = 1 ):
788
789
"""Do k-fold cross_validate and return their mean.
789
790
That is, keep out 1/k of the examples for testing on each of k runs.
790
- Shuffle the examples first; If trials>1, average over several shuffles.
791
+ Shuffle the examples first; if trials>1, average over several shuffles.
791
792
Returns Training error, Validataion error"""
792
793
if k is None :
793
794
k = len (dataset .examples )
@@ -820,11 +821,11 @@ def cross_validation(learner, size, dataset, k=10, trials=1):
820
821
821
822
def cross_validation_wrapper (learner , dataset , k = 10 , trials = 1 ):
822
823
"""
823
- Fig 18.8
824
+ [ Fig 18.8]
824
825
Return the optimal value of size having minimum error
825
826
on validataion set.
826
- err_train: a training error array, indexed by size
827
- err_val: a validataion error array, indexed by size
827
+ err_train: A training error array, indexed by size
828
+ err_val: A validataion error array, indexed by size
828
829
"""
829
830
err_val = []
830
831
err_train = []
@@ -843,7 +844,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
843
844
844
845
845
846
def leave_one_out (learner , dataset ):
846
- "Leave one out cross-validation over the dataset."
847
+ """ Leave one out cross-validation over the dataset."" "
847
848
return cross_validation (learner , size , dataset , k = len (dataset .examples ))
848
849
849
850
@@ -878,7 +879,7 @@ def score(learner, size):
878
879
879
880
880
881
def RestaurantDataSet (examples = None ):
881
- "Build a DataSet of Restaurant waiting examples. [Figure 18.3]"
882
+ """ Build a DataSet of Restaurant waiting examples. [Figure 18.3]"" "
882
883
return DataSet (name = 'restaurant' , target = 'Wait' , examples = examples ,
883
884
attrnames = 'Alternate Bar Fri/Sat Hungry Patrons Price ' +
884
885
'Raining Reservation Type WaitEstimate Wait' )
@@ -917,7 +918,7 @@ def T(attrname, branches):
917
918
918
919
919
920
def SyntheticRestaurant (n = 20 ):
920
- "Generate a DataSet with n examples."
921
+ """ Generate a DataSet with n examples."" "
921
922
def gen ():
922
923
example = list (map (random .choice , restaurant .values ))
923
924
example [restaurant .target ] = waiting_decision_tree (example )
0 commit comments