Skip to content

Update Comments in learning.py + PluralityLearner Update #315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 7, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 46 additions & 45 deletions learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def setproblem(self, target, inputs=None, exclude=()):
self.check_me()

def check_me(self):
"Check that my fields make sense."
"""Check that my fields make sense."""
assert len(self.attrnames) == len(self.attrs)
assert self.target in self.attrs
assert self.target not in self.inputs
Expand All @@ -130,20 +130,20 @@ def check_me(self):
list(map(self.check_example, self.examples))

def add_example(self, example):
"Add an example to the list of examples, checking it first."
"""Add an example to the list of examples, checking it first."""
self.check_example(example)
self.examples.append(example)

def check_example(self, example):
"Raise ValueError if example has any invalid values."
"""Raise ValueError if example has any invalid values."""
if self.values:
for a in self.attrs:
if example[a] not in self.values[a]:
raise ValueError('Bad value {} for attribute {} in {}'
.format(example[a], self.attrnames[a], example))

def attrnum(self, attr):
"Returns the number used for attr, which can be a name, or -n .. n-1."
"""Returns the number used for attr, which can be a name, or -n .. n-1."""
if isinstance(attr, str):
return self.attrnames.index(attr)
elif attr < 0:
Expand All @@ -152,7 +152,7 @@ def attrnum(self, attr):
return attr

def sanitize(self, example):
"Return a copy of example, with non-input attributes replaced by None."
"""Return a copy of example, with non-input attributes replaced by None."""
return [attr_i if i in self.inputs else None
for i, attr_i in enumerate(example)]

Expand All @@ -165,12 +165,11 @@ def __repr__(self):

def parse_csv(input, delim=','):
r"""Input is a string consisting of lines, each line has comma-delimited
fields. Convert this into a list of lists. Blank lines are skipped.
fields. Convert this into a list of lists. Blank lines are skipped.
Fields that look like numbers are converted to numbers.
The delim defaults to ',' but '\t' and None are also reasonable values.
>>> parse_csv('1, 2, 3 \n 0, 2, na')
[[1, 2, 3], [0, 2, 'na']]
"""
[[1, 2, 3], [0, 2, 'na']]"""
lines = [line for line in input.splitlines() if line.strip()]
return [list(map(num_or_str, line.split(delim))) for line in lines]

Expand Down Expand Up @@ -199,7 +198,7 @@ def __init__(self, observations=[], default=0):
self.add(o)

def add(self, o):
"Add an observation o to the distribution."
"""Add an observation o to the distribution."""
self.smooth_for(o)
self.dictionary[o] += 1
self.n_obs += 1
Expand All @@ -214,18 +213,18 @@ def smooth_for(self, o):
self.sampler = None

def __getitem__(self, item):
"Return an estimate of the probability of item."
"""Return an estimate of the probability of item."""
self.smooth_for(item)
return self.dictionary[item] / self.n_obs

# (top() and sample() are not used in this module, but elsewhere.)

def top(self, n):
"Return (count, obs) tuples for the n most frequent observations."
"""Return (count, obs) tuples for the n most frequent observations."""
return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])

def sample(self):
"Return a random sample from the distribution."
"""Return a random sample from the distribution."""
if self.sampler is None:
self.sampler = weighted_sampler(list(self.dictionary.keys()),
list(self.dictionary.values()))
Expand All @@ -240,7 +239,7 @@ def PluralityLearner(dataset):
most_popular = mode([e[dataset.target] for e in dataset.examples])

def predict(example):
"Always return same result: the most popular from the training set."
"""Always return same result: the most popular from the training set."""
return most_popular
return predict

Expand Down Expand Up @@ -278,9 +277,9 @@ def class_probability(targetval):


def NearestNeighborLearner(dataset, k=1):
"k-NearestNeighbor: the k nearest neighbors vote."
"""k-NearestNeighbor: the k nearest neighbors vote."""
def predict(example):
"Find the k closest, and have them vote for the best."
"""Find the k closest items, and have them vote for the best."""
best = heapq.nsmallest(k, ((dataset.distance(e, example), e)
for e in dataset.examples))
return mode(e[dataset.target] for (d, e) in best)
Expand All @@ -295,18 +294,18 @@ class DecisionFork:
of branches, one for each of the attribute's values."""

def __init__(self, attr, attrname=None, branches=None):
"Initialize by saying what attribute this node tests."
"""Initialize by saying what attribute this node tests."""
self.attr = attr
self.attrname = attrname or attr
self.branches = branches or {}

def __call__(self, example):
"Given an example, classify it using the attribute and the branches."
"""Given an example, classify it using the attribute and the branches."""
attrvalue = example[self.attr]
return self.branches[attrvalue](example)

def add(self, val, subtree):
"Add a branch. If self.attr = val, go to the given subtree."
"""Add a branch. If self.attr = val, go to the given subtree."""
self.branches[val] = subtree

def display(self, indent=0):
Expand All @@ -323,7 +322,7 @@ def __repr__(self):

class DecisionLeaf:

"A leaf of a decision tree holds just a result."
"""A leaf of a decision tree holds just a result."""

def __init__(self, result):
self.result = result
Expand All @@ -341,7 +340,7 @@ def __repr__(self):


def DecisionTreeLearner(dataset):
"[Figure 18.5]"
"""[Figure 18.5]"""

target, values = dataset.target, dataset.values

Expand Down Expand Up @@ -369,21 +368,21 @@ def plurality_value(examples):
return DecisionLeaf(popular)

def count(attr, val, examples):
"Count the number of examples that have attr = val."
"""Count the number of examples that have attr = val."""
return len(e[attr] == val for e in examples) #count(e[attr] == val for e in examples)

def all_same_class(examples):
"Are all these examples in the same target class?"
"""Are all these examples in the same target class?"""
class0 = examples[0][target]
return all(e[target] == class0 for e in examples)

def choose_attribute(attrs, examples):
"Choose the attribute with the highest information gain."
"""Choose the attribute with the highest information gain."""
return argmax_random_tie(attrs,
key=lambda a: information_gain(a, examples))

def information_gain(attr, examples):
"Return the expected reduction in entropy from splitting by attr."
"""Return the expected reduction in entropy from splitting by attr."""
def I(examples):
return information_content([count(target, v, examples)
for v in values[target]])
Expand All @@ -393,15 +392,15 @@ def I(examples):
return I(examples) - remainder

def split_by(attr, examples):
"Return a list of (val, examples) pairs for each val of attr."
"""Return a list of (val, examples) pairs for each val of attr."""
return [(v, [e for e in examples if e[attr] == v])
for v in values[attr]]

return decision_tree_learning(dataset.examples, dataset.inputs)


def information_content(values):
"Number of bits to represent the probability distribution in values."
"""Number of bits to represent the probability distribution in values."""
probabilities = normalize(removeall(0, values))
return sum(-p * math.log2(p) for p in probabilities)

Expand All @@ -427,11 +426,11 @@ def find_examples(examples):
raise NotImplementedError

def passes(example, test):
"Does the example pass the test?"
"""Does the example pass the test?"""
raise NotImplementedError

def predict(example):
"Predict the outcome for the first passing test."
"""Predict the outcome for the first passing test."""
for test, outcome in predict.decision_list:
if passes(example, test):
return outcome
Expand All @@ -447,7 +446,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3],
"""
Layered feed-forward network.
hidden_layer_sizes: List of number of hidden units per hidden layer
learning_rate: Learning rate of gradient decent
learning_rate: Learning rate of gradient descent
epoches: Number of passes over the dataset
"""

Expand Down Expand Up @@ -487,7 +486,7 @@ class NNUnit:
"""
Single Unit of Multiple Layer Neural Network
inputs: Incoming connections
weights: weights to incoming connections
weights: Weights to incoming connections
"""

def __init__(self, weights=None, inputs=None):
Expand All @@ -500,7 +499,7 @@ def __init__(self, weights=None, inputs=None):
def network(input_units, hidden_layer_sizes, output_units):
"""
Create Directed Acyclic Network of given number layers.
hidden_layers_sizes : list number of neuron units in each hidden layer
hidden_layers_sizes : List number of neuron units in each hidden layer
excluding input and output layers
"""
# Check for PerceptronLearner
Expand Down Expand Up @@ -627,8 +626,8 @@ def predict(example):
# ______________________________________________________________________________


def Linearlearner(dataset, learning_rate=0.01, epochs=100):
"""Define with learner = Linearlearner(data); infer with learner(x)."""
def LinearLearner(dataset, learning_rate=0.01, epochs=100):
"""Define with learner = LinearLearner(data); infer with learner(x)."""
idx_i = dataset.inputs
idx_t = dataset.target # As of now, dataset.target gives only one index.
examples = dataset.examples
Expand Down Expand Up @@ -702,7 +701,7 @@ def train(dataset):


def WeightedMajority(predictors, weights):
"Return a predictor that takes a weighted vote."
"""Return a predictor that takes a weighted vote."""
def predict(example):
return weighted_mode((predictor(example) for predictor in predictors),
weights)
Expand All @@ -712,7 +711,8 @@ def predict(example):
def weighted_mode(values, weights):
"""Return the value with the greatest total weight.
>>> weighted_mode('abbaa', [1,2,3,1,2])
'b'"""
'b'
"""
totals = defaultdict(int)
for v, w in zip(values, weights):
totals[v] += w
Expand All @@ -731,7 +731,7 @@ def train(dataset, weights):


def replicated_dataset(dataset, weights, n=None):
"Copy dataset, replicating each example in proportion to its weight."
"""Copy dataset, replicating each example in proportion to its weight."""
n = n or len(dataset.examples)
result = copy.copy(dataset)
result.examples = weighted_replicate(dataset.examples, weights, n)
Expand All @@ -743,7 +743,8 @@ def weighted_replicate(seq, weights, n):
seq proportional to the corresponding weight (filling in fractions
randomly).
>>> weighted_replicate('ABC', [1,2,1], 4)
['A', 'B', 'B', 'C']"""
['A', 'B', 'B', 'C']
"""
assert len(seq) == len(weights)
weights = normalize(weights)
wholes = [int(w * n) for w in weights]
Expand All @@ -759,7 +760,7 @@ def flatten(seqs): return sum(seqs, [])


def test(predict, dataset, examples=None, verbose=0):
"Return the proportion of the examples that are NOT correctly predicted."
"""Return the proportion of the examples that are NOT correctly predicted."""
if examples is None:
examples = dataset.examples
if len(examples) == 0:
Expand Down Expand Up @@ -791,7 +792,7 @@ def train_and_test(dataset, start, end):
def cross_validation(learner, size, dataset, k=10, trials=1):
"""Do k-fold cross_validate and return their mean.
That is, keep out 1/k of the examples for testing on each of k runs.
Shuffle the examples first; If trials>1, average over several shuffles.
Shuffle the examples first; if trials>1, average over several shuffles.
Returns Training error, Validataion error"""
if k is None:
k = len(dataset.examples)
Expand Down Expand Up @@ -824,11 +825,11 @@ def cross_validation(learner, size, dataset, k=10, trials=1):

def cross_validation_wrapper(learner, dataset, k=10, trials=1):
"""
Fig 18.8
[Fig 18.8]
Return the optimal value of size having minimum error
on validataion set.
err_train: a training error array, indexed by size
err_val: a validataion error array, indexed by size
err_train: A training error array, indexed by size
err_val: A validataion error array, indexed by size
"""
err_val = []
err_train = []
Expand All @@ -847,7 +848,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):


def leave_one_out(learner, dataset):
"Leave one out cross-validation over the dataset."
"""Leave one out cross-validation over the dataset."""
return cross_validation(learner, size, dataset, k=len(dataset.examples))


Expand Down Expand Up @@ -882,7 +883,7 @@ def score(learner, size):


def RestaurantDataSet(examples=None):
"Build a DataSet of Restaurant waiting examples. [Figure 18.3]"
"""Build a DataSet of Restaurant waiting examples. [Figure 18.3]"""
return DataSet(name='restaurant', target='Wait', examples=examples,
attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' +
'Raining Reservation Type WaitEstimate Wait')
Expand Down Expand Up @@ -921,7 +922,7 @@ def T(attrname, branches):


def SyntheticRestaurant(n=20):
"Generate a DataSet with n examples."
"""Generate a DataSet with n examples."""
def gen():
example = list(map(random.choice, restaurant.values))
example[restaurant.target] = waiting_decision_tree(example)
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy