Skip to content

Commit 2ff021f

Browse files
pprettamueller
authored andcommitted
fix issue 4447 : min_weight_leaf not properly passed to PresortBestSplitter
1 parent 33d132a commit 2ff021f

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

sklearn/ensemble/gradient_boosting.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,12 +1002,19 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
10021002
n_inbag = max(1, int(self.subsample * n_samples))
10031003
loss_ = self.loss_
10041004

1005+
# Set min_weight_leaf from min_weight_fraction_leaf
1006+
if self.min_weight_fraction_leaf != 0. and sample_weight is not None:
1007+
min_weight_leaf = (self.min_weight_fraction_leaf *
1008+
np.sum(sample_weight))
1009+
else:
1010+
min_weight_leaf = 0.
1011+
10051012
# init criterion and splitter
10061013
criterion = FriedmanMSE(1)
10071014
splitter = PresortBestSplitter(criterion,
10081015
self.max_features_,
10091016
self.min_samples_leaf,
1010-
self.min_weight_fraction_leaf,
1017+
min_weight_leaf,
10111018
random_state)
10121019

10131020
if self.verbose:

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,26 @@ def test_non_uniform_weights_toy_edge_case_reg():
980980
y = [0, 0, 1, 0]
981981
# ignore the first 2 training samples by setting their weight to 0
982982
sample_weight = [0, 0, 1, 1]
983-
for loss in ('ls', 'huber', 'lad', 'quantile'):
984-
gb = GradientBoostingRegressor(n_estimators=5)
983+
for loss in ('huber', 'ls', 'lad', 'quantile'):
984+
gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss)
985985
gb.fit(X, y, sample_weight=sample_weight)
986-
assert_true(gb.predict([[1, 0]])[0] > 0.5)
986+
assert_greater(gb.predict([[1, 0]])[0], 0.5)
987+
988+
989+
def test_non_uniform_weights_toy_min_weight_leaf():
990+
"""Regression test for https://github.com/scikit-learn/scikit-learn/issues/4447 """
991+
X = [[1, 0],
992+
[1, 0],
993+
[1, 0],
994+
[0, 1],
995+
]
996+
y = [0, 0, 1, 0]
997+
# ignore the first 2 training samples by setting their weight to 0
998+
sample_weight = [0, 0, 1, 1]
999+
gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
1000+
gb.fit(X, y, sample_weight=sample_weight)
1001+
assert_true(gb.predict([[1, 0]])[0] > 0.5)
1002+
assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
9871003

9881004

9891005
def test_non_uniform_weights_toy_edge_case_clf():

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy