Skip to content

Commit f425134

Browse files
committed
ENH/DOC fix poly features complexity
Fixes scikit-learn#3191, scikit-learn#3194.
1 parent 96436df commit f425134

File tree

2 files changed

+34
-13
lines changed

2 files changed

+34
-13
lines changed

sklearn/preprocessing/data.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
# Andreas Mueller <amueller@ais.uni-bonn.de>
55
# License: BSD 3 clause
66

7+
from itertools import chain
78
import numbers
8-
import warnings
9-
import itertools
109

1110
import numpy as np
1211
from scipy import sparse
@@ -20,13 +19,15 @@
2019
from ..utils import safe_asarray
2120
from ..utils import warn_if_not_float
2221
from ..utils.extmath import row_norms
22+
from ..utils.fixes import combinations_with_replacement as comb_w_r
2323
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
2424
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
2525
from ..utils.sparsefuncs import inplace_column_scale
2626
from ..utils.sparsefuncs import mean_variance_axis0
2727

2828
zip = six.moves.zip
2929
map = six.moves.map
30+
range = six.moves.range
3031

3132
__all__ = [
3233
'Binarizer',
@@ -427,8 +428,8 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
427428
Notes
428429
-----
429430
Be aware that the number of features in the output array scales
430-
exponentially in the number of features of the input array, so this
431-
is not suitable for higher-dimensional data.
431+
polynomially in the number of features of the input array, and
432+
exponentially in the degree. High degrees can cause overfitting.
432433
433434
See :ref:`examples/plot_polynomial_regression.py
434435
<example_plot_polynomial_regression.py>`
@@ -440,15 +441,11 @@ def __init__(self, degree=2, include_bias=True):
440441
@staticmethod
441442
def _power_matrix(n_features, degree, include_bias):
442443
"""Compute the matrix of polynomial powers"""
443-
# Find permutations/combinations which add to degree or less
444-
deg_min = 0 if include_bias else 1
445-
powers = itertools.product(*(range(degree + 1)
446-
for i in range(n_features)))
447-
powers = np.array([c for c in powers if deg_min <= sum(c) <= degree])
448-
449-
# sort so that the order of the powers makes sense
450-
i = np.lexsort(np.vstack([powers.T, powers.sum(1)]))
451-
return powers[i]
444+
start = int(not include_bias)
445+
combn = chain.from_iterable(comb_w_r(range(n_features), i)
446+
for i in range(start, degree + 1))
447+
powers = np.vstack(np.bincount(c, minlength=n_features) for c in combn)
448+
return powers
452449

453450
def fit(self, X, y=None):
454451
"""

sklearn/utils/fixes.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,27 @@ def sparse_min_max(X, axis):
172172
# numpy.argpartition was introduced in v 1.8.0
173173
def argpartition(a, kth, axis=-1, kind='introselect', order=None):
174174
return np.argsort(a, axis=axis, order=order)
175+
176+
177+
try:
178+
from itertools import combinations_with_replacement
179+
except ImportError:
180+
# Backport of itertools.combinations_with_replacement for Python 2.6,
181+
# from Python 3.4 documentation (http://tinyurl.com/comb-w-r), copyright
182+
# Python Software Foundation (https://docs.python.org/3/license.html)
183+
def combinations_with_replacement(iterable, r):
184+
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
185+
pool = tuple(iterable)
186+
n = len(pool)
187+
if not n and r:
188+
return
189+
indices = [0] * r
190+
yield tuple(pool[i] for i in indices)
191+
while True:
192+
for i in reversed(range(r)):
193+
if indices[i] != n - 1:
194+
break
195+
else:
196+
return
197+
indices[i:] = [indices[i] + 1] * (r - i)
198+
yield tuple(pool[i] for i in indices)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy