FIX don't raise memory error in ledoit wolf

amueller · amueller · commit 3f5fbbdb2939 · 2015-04-06T11:34:51.000-04:00
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
@@ -240,17 +240,15 @@ def ledoit_wolf(X, assume_centered=False, block_size=1000):
     X : array-like, shape (n_samples, n_features)
         Data from which to compute the covariance estimate
 
-    assume_centered : Boolean
+    assume_centered : boolean, default=False
         If True, data are not centered before computation.
         Useful to work with data whose mean is significantly equal to
         zero but is not exactly zero.
         If False, data are centered before computation.
 
-    block_size : int,
+    block_size : int, default=1000
         Size of the blocks into which the covariance matrix will be split.
-        If n_features > `block_size`, an error will be raised since the
-        shrunk covariance matrix will be considered as too large regarding
-        the available memory.
+        This is purely a memory optimization and does not affect results.
 
     Returns
     -------
@@ -286,10 +284,6 @@ def ledoit_wolf(X, assume_centered=False, block_size=1000):
     else:
         n_samples, n_features = X.shape
 
-    if n_features > block_size:
-        raise MemoryError("LW: n_features is too large, " +
-                          "try increasing block_size")
-
     # get Ledoit-Wolf shrinkage
     shrinkage = ledoit_wolf_shrinkage(
         X, assume_centered=assume_centered, block_size=block_size)
@@ -312,21 +306,19 @@ class LedoitWolf(EmpiricalCovariance):
 
     Parameters
     ----------
-    store_precision : bool
+    store_precision : bool, default=True
         Specify if the estimated precision is stored.
 
-    assume_centered : bool
+    assume_centered : bool, default=False
         If True, data are not centered before computation.
         Useful when working with data whose mean is almost, but not exactly
         zero.
         If False (default), data are centered before computation.
 
-    block_size : int,
+    block_size : int, default=1000
         Size of the blocks into which the covariance matrix will be split
-        during its Ledoit-Wolf estimation.
-        If n_features > `block_size`, an error will be raised since the
-        shrunk covariance matrix will be considered as too large regarding
-        the available memory.
+        during its Ledoit-Wolf estimation. This is purely a memory
+        optimization and does not affect results.
 
     Attributes
     ----------
@@ -480,10 +472,10 @@ class OAS(EmpiricalCovariance):
 
     Parameters
     ----------
-    store_precision : bool
+    store_precision : bool, default=True
         Specify if the estimated precision is stored.
 
-    assume_centered: bool
+    assume_centered: bool, default=False
         If True, data are not centered before computation.
         Useful when working with data whose mean is almost, but not exactly
         zero.
diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
@@ -145,10 +145,6 @@ def test_ledoit_wolf():
     assert_almost_equal(lw.score(X_centered), score_, 4)
     assert(lw.precision_ is None)
 
-    # (too) large data set
-    X_large = np.ones((20, 200))
-    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)
-
     # Same tests without assuming centered data
     # test shrinkage coeff on a simple data set
     lw = LedoitWolf()
@@ -190,6 +186,21 @@ def test_ledoit_wolf():
     assert(lw.precision_ is None)
 
 
+def test_ledoit_wolf_large():
+    # test that ledoit_wolf doesn't error on data that is wider than block_size
+    rng = np.random.RandomState(0)
+    # use a number of features that is larger than the block-size
+    X = rng.normal(size=(10, 20))
+    lw = LedoitWolf(block_size=10).fit(X)
+    # check that covariance is about diagonal (random normal noise)
+    assert_almost_equal(lw.covariance_, np.eye(20), 0)
+    cov = lw.covariance_
+
+    # check that the result is consistent with not splitting data into blocks.
+    lw = LedoitWolf(block_size=25).fit(X)
+    assert_almost_equal(lw.covariance_, cov)
+
+
 def test_oas():
     # Tests OAS module on a simple dataset.
     # test shrinkage coeff on a simple data set