2
2
# Mathieu Blondel <mathieu@mblondel.org>
3
3
# Olivier Grisel <olivier.grisel@ensta.org>
4
4
# Andreas Mueller <amueller@ais.uni-bonn.de>
5
+ # Eric Martin <eric@ericmart.in>
5
6
# License: BSD 3 clause
6
7
7
8
from itertools import chain , combinations
@@ -471,9 +472,16 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
471
472
472
473
Attributes
473
474
----------
475
+ powers_ : array, shape (n_input_features, n_output_features)
476
+ powers_[i, j] is the exponent of the jth input in the ith output.
474
477
475
- powers_ :
476
- powers_[i, j] is the exponent of the jth input in the ith output.
478
+ n_input_features_ : int
479
+ The total number of input features.
480
+
481
+ n_output_features_ : int
482
+ The total number of polynomial output features. The number of output
483
+ features is computed by iterating over all suitably sized combinations
484
+ of input features.
477
485
478
486
Notes
479
487
-----
@@ -490,23 +498,32 @@ def __init__(self, degree=2, interaction_only=False, include_bias=True):
490
498
self .include_bias = include_bias
491
499
492
500
@staticmethod
493
- def _power_matrix (n_features , degree , interaction_only , include_bias ):
494
- """Compute the matrix of polynomial powers"""
501
+ def _combinations (n_features , degree , interaction_only , include_bias ):
495
502
comb = (combinations if interaction_only else combinations_w_r )
496
503
start = int (not include_bias )
497
- combn = chain .from_iterable (comb (range (n_features ), i )
498
- for i in range (start , degree + 1 ))
499
- powers = np .vstack (bincount (c , minlength = n_features ) for c in combn )
500
- return powers
504
+ return chain .from_iterable (comb (range (n_features ), i )
505
+ for i in range (start , degree + 1 ))
506
+
507
+ @property
508
+ def powers_ (self ):
509
+ check_is_fitted (self , 'n_input_features_' )
510
+
511
+ combinations = self ._combinations (self .n_input_features_ , self .degree ,
512
+ self .interaction_only ,
513
+ self .include_bias )
514
+ return np .vstack (np .bincount (c , minlength = self .n_input_features_ )
515
+ for c in combinations )
501
516
502
517
def fit (self , X , y = None ):
503
518
"""
504
- Compute the polynomial feature combinations
519
+ Compute number of output features.
505
520
"""
506
521
n_samples , n_features = check_array (X ).shape
507
- self . powers_ = self ._power_matrix (n_features , self .degree ,
522
+ combinations = self ._combinations (n_features , self .degree ,
508
523
self .interaction_only ,
509
524
self .include_bias )
525
+ self .n_input_features_ = n_features
526
+ self .n_output_features_ = sum (1 for _ in combinations )
510
527
return self
511
528
512
529
def transform (self , X , y = None ):
@@ -523,15 +540,24 @@ def transform(self, X, y=None):
523
540
The matrix of features, where NP is the number of polynomial
524
541
features generated from the combination of inputs.
525
542
"""
526
- check_is_fitted (self , 'powers_' )
543
+ check_is_fitted (self , [ 'n_input_features_' , 'n_output_features_' ] )
527
544
528
545
X = check_array (X )
529
546
n_samples , n_features = X .shape
530
547
531
- if n_features != self .powers_ . shape [ 1 ] :
548
+ if n_features != self .n_input_features_ :
532
549
raise ValueError ("X shape does not match training shape" )
533
550
534
- return (X [:, None , :] ** self .powers_ ).prod (- 1 )
551
+ # allocate output data
552
+ XP = np .empty ((n_samples , self .n_output_features_ ), dtype = X .dtype )
553
+
554
+ combinations = self ._combinations (n_features , self .degree ,
555
+ self .interaction_only ,
556
+ self .include_bias )
557
+ for i , c in enumerate (combinations ):
558
+ XP [:, i ] = X [:, c ].prod (1 )
559
+
560
+ return XP
535
561
536
562
537
563
def normalize (X , norm = 'l2' , axis = 1 , copy = True ):
@@ -1112,7 +1138,8 @@ def _transform(self, X):
1112
1138
# We use only those catgorical features of X that are known using fit.
1113
1139
# i.e lesser than n_values_ using mask.
1114
1140
# This means, if self.handle_unknown is "ignore", the row_indices and
1115
- # col_indices corresponding to the unknown categorical feature are ignored.
1141
+ # col_indices corresponding to the unknown categorical feature are
1142
+ # ignored.
1116
1143
mask = (X < self .n_values_ ).ravel ()
1117
1144
if np .any (~ mask ):
1118
1145
if self .handle_unknown not in ['error' , 'ignore' ]:
0 commit comments