Skip to content

Commit 3f5fbbd

Browse files
committed
FIX don't raise memory error in ledoit wolf
1 parent 2b3fabc commit 3f5fbbd

File tree

2 files changed

+25
-22
lines changed

2 files changed

+25
-22
lines changed

sklearn/covariance/shrunk_covariance_.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -240,17 +240,15 @@ def ledoit_wolf(X, assume_centered=False, block_size=1000):
240240
X : array-like, shape (n_samples, n_features)
241241
Data from which to compute the covariance estimate
242242
243-
assume_centered : Boolean
243+
assume_centered : boolean, default=False
244244
If True, data are not centered before computation.
245245
Useful to work with data whose mean is significantly equal to
246246
zero but is not exactly zero.
247247
If False, data are centered before computation.
248248
249-
block_size : int,
249+
block_size : int, default=1000
250250
Size of the blocks into which the covariance matrix will be split.
251-
If n_features > `block_size`, an error will be raised since the
252-
shrunk covariance matrix will be considered as too large regarding
253-
the available memory.
251+
This is purely a memory optimization and does not affect results.
254252
255253
Returns
256254
-------
@@ -286,10 +284,6 @@ def ledoit_wolf(X, assume_centered=False, block_size=1000):
286284
else:
287285
n_samples, n_features = X.shape
288286

289-
if n_features > block_size:
290-
raise MemoryError("LW: n_features is too large, " +
291-
"try increasing block_size")
292-
293287
# get Ledoit-Wolf shrinkage
294288
shrinkage = ledoit_wolf_shrinkage(
295289
X, assume_centered=assume_centered, block_size=block_size)
@@ -312,21 +306,19 @@ class LedoitWolf(EmpiricalCovariance):
312306
313307
Parameters
314308
----------
315-
store_precision : bool
309+
store_precision : bool, default=True
316310
Specify if the estimated precision is stored.
317311
318-
assume_centered : bool
312+
assume_centered : bool, default=False
319313
If True, data are not centered before computation.
320314
Useful when working with data whose mean is almost, but not exactly
321315
zero.
322316
If False (default), data are centered before computation.
323317
324-
block_size : int,
318+
block_size : int, default=1000
325319
Size of the blocks into which the covariance matrix will be split
326-
during its Ledoit-Wolf estimation.
327-
If n_features > `block_size`, an error will be raised since the
328-
shrunk covariance matrix will be considered as too large regarding
329-
the available memory.
320+
during its Ledoit-Wolf estimation. This is purely a memory
321+
optimization and does not affect results.
330322
331323
Attributes
332324
----------
@@ -480,10 +472,10 @@ class OAS(EmpiricalCovariance):
480472
481473
Parameters
482474
----------
483-
store_precision : bool
475+
store_precision : bool, default=True
484476
Specify if the estimated precision is stored.
485477
486-
assume_centered: bool
478+
assume_centered: bool, default=False
487479
If True, data are not centered before computation.
488480
Useful when working with data whose mean is almost, but not exactly
489481
zero.

sklearn/covariance/tests/test_covariance.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,6 @@ def test_ledoit_wolf():
145145
assert_almost_equal(lw.score(X_centered), score_, 4)
146146
assert(lw.precision_ is None)
147147

148-
# (too) large data set
149-
X_large = np.ones((20, 200))
150-
assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)
151-
152148
# Same tests without assuming centered data
153149
# test shrinkage coeff on a simple data set
154150
lw = LedoitWolf()
@@ -190,6 +186,21 @@ def test_ledoit_wolf():
190186
assert(lw.precision_ is None)
191187

192188

189+
def test_ledoit_wolf_large():
190+
# test that ledoit_wolf doesn't error on data that is wider than block_size
191+
rng = np.random.RandomState(0)
192+
# use a number of features that is larger than the block-size
193+
X = rng.normal(size=(10, 20))
194+
lw = LedoitWolf(block_size=10).fit(X)
195+
# check that covariance is about diagonal (random normal noise)
196+
assert_almost_equal(lw.covariance_, np.eye(20), 0)
197+
cov = lw.covariance_
198+
199+
# check that the result is consistent with not splitting data into blocks.
200+
lw = LedoitWolf(block_size=25).fit(X)
201+
assert_almost_equal(lw.covariance_, cov)
202+
203+
193204
def test_oas():
194205
# Tests OAS module on a simple dataset.
195206
# test shrinkage coeff on a simple data set

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy