Cosmit readability improvements and better whatsnew.

amueller · amueller · commit 2ddb503b5912 · 2015-06-01T12:56:21.000-04:00
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -56,8 +56,10 @@ Enhancements
      :class:`linear_model.LogisticRegression`, by avoiding loss computation.
      By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
 
-   - Improved heuristic for ``class_weight="auto"`` for classifiers supporting
-     ``class_weight`` by Hanna Wallach and `Andreas Müller`_
+   - The ``class_weight="auto"`` heuristic in classifiers supporting 
+     ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
+     option, which has a simpler forumlar and interpretation.
+     By Hanna Wallach and `Andreas Müller`_.
 
 Bug fixes
 .........
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -1087,7 +1087,7 @@ def check_class_weight_balanced_linear_classifier(name, Classifier):
     """Test class weights with non-contiguous class labels."""
     X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                   [1.0, 1.0], [1.0, 0.0]])
-    y = [1, 1, 1, -1, -1]
+    y = np.array([1, 1, 1, -1, -1])
 
     with warnings.catch_warnings(record=True):
         classifier = Classifier()
@@ -1102,10 +1102,11 @@ def check_class_weight_balanced_linear_classifier(name, Classifier):
     coef_balanced = classifier.fit(X, y).coef_.copy()
 
     # Count each label occurrence to reweight manually
-    class_weight = {
-        1: 5. / (2 * 3),
-        -1: 5. / (2 * 2)
-    }
+    n_samples = len(y)
+    n_classes = len(np.unique(y))
+
+    class_weight = {1: n_samples / (np.sum(y == 1) * n_classes),
+                    -1: n_samples / (np.sum(y == -1) * n_classes)}
     classifier.set_params(class_weight=class_weight)
     coef_manual = classifier.fit(X, y).coef_.copy()
 
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
@@ -39,15 +39,15 @@ def test_compute_class_weight_not_present():
 
 
 def test_compute_class_weight_invariance():
-    # test that results with class_weight="balanced" is invariant against
-    # class imbalance if the number of samples is identical
-    # the test uses a balanced two class dataset with 100 datapoints.
-    # it then creates three versions, one where class 1 is duplicated
+    # Test that results with class_weight="balanced" is invariant wrt
+    # class imbalance if the number of samples is identical.
+    # The test uses a balanced two class dataset with 100 datapoints.
+    # It creates three versions, one where class 1 is duplicated
     # resulting in 150 points of class 1 and 50 of class 0,
     # one where there are 50 points in class 1 and 150 in class 0,
     # and one where there are 100 points of each class (this one is balanced
     # again).
-    # with balancing class weights, all three should give the same model.
+    # With balancing class weights, all three should give the same model.
     X, y = make_blobs(centers=2, random_state=0)
     # create dataset where class 1 is duplicated twice
     X_1 = np.vstack([X] + [X[y == 1]] * 2)