DOC/FIX Address @arjoly's comments

jnothman · ogrisel · commit fc0150d08d29 · 2014-06-05T11:20:36.000+02:00
diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
@@ -244,7 +244,7 @@ Multiclass and multilabel utility function
   a classification output is in label indicator matrix format.
 
 - :func:`multiclass.unique_labels`: Helper function to extract an ordered
-  array of unique labels from a list of labels.
+  array of unique labels from different formats of target.
 
 
 Helper Functions
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -259,7 +259,7 @@ where :math:`1(x)` is the `indicator function
   >>> accuracy_score(y_true, y_pred, normalize=False)
   2
 
-In the multilabel case with binary label indicators:
+In the multilabel case with binary label indicators: ::
 
   >>> accuracy_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))
   0.5
@@ -424,7 +424,7 @@ score is equal to the classification accuracy.
   >>> jaccard_similarity_score(y_true, y_pred, normalize=False)
   2
 
-In the multilabel case with binary label indicators:
+In the multilabel case with binary label indicators: ::
 
   >>> jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))
   0.75
diff --git a/examples/plot_multilabel.py b/examples/plot_multilabel.py
@@ -72,7 +72,7 @@ def plot_subfigure(X, Y, subplot, title, transform):
     pl.title(title)
 
     zero_class = np.where(Y[:, 0])
-    one_class = np.where(Y[:, 0])
+    one_class = np.where(Y[:, 1])
     pl.scatter(X[:, 0], X[:, 1], s=40, c='gray')
     pl.scatter(X[zero_class, 0], X[zero_class, 1], s=160, edgecolors='b',
                facecolors='none', linewidths=2, label='Class 1')
diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py
@@ -314,7 +314,7 @@ def average_precision_score(y_true, y_score, average="macro",
 
     ----------
     y_true : array, shape = [n_samples] or [n_samples, n_classes]
-        True binary labels in binary indicator format.
+        True binary labels in binary label indicators.
 
     y_score : array, shape = [n_samples] or [n_samples, n_classes]
         Target scores, can either be probability estimates of the positive
@@ -426,7 +426,7 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
     Parameters
     ----------
     y_true : array, shape = [n_samples] or [n_samples, n_classes]
-        True binary labels in binary indicator format.
+        True binary labels in binary label indicators.
 
     y_score : array, shape = [n_samples] or [n_samples, n_classes]
         Target scores, can either be probability estimates of the positive
@@ -527,7 +527,7 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None):
     Parameters
     ----------
     y_true : array, shape = [n_samples] or [n_samples, n_classes]
-        True binary labels in binary indicator format.
+        True binary labels in binary label indicators.
 
     y_score : array, shape = [n_samples] or [n_samples, n_classes]
         Target scores, can either be probability estimates of the positive
@@ -986,10 +986,10 @@ def zero_one_loss(y_true, y_pred, normalize=True, sample_weight=None):
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) labels.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Predicted labels, as returned by a classifier.
 
     normalize : bool, optional (default=True)
@@ -1025,17 +1025,10 @@ def zero_one_loss(y_true, y_pred, normalize=True, sample_weight=None):
     >>> zero_one_loss(y_true, y_pred, normalize=False)
     1
 
-    In the multilabel case with binary indicator format:
+    In the multilabel case with binary label indicators:
 
-    >>> zero_one_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))
+    >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
     0.5
-
-    and with a list of labels format:
-
-    >>> zero_one_loss([(1, ), (3, )], [(1, 2), tuple()])
-    1.0
-
-
     """
     score = accuracy_score(y_true, y_pred,
                            normalize=normalize,
@@ -1064,7 +1057,7 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True):
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) labels for n_samples samples.
 
     y_pred : array-like of float, shape = (n_samples, n_classes)
@@ -1139,10 +1132,10 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) labels.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Predicted labels, as returned by a classifier.
 
     normalize : bool, optional (default=True)
@@ -1187,17 +1180,11 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
     >>> jaccard_similarity_score(y_true, y_pred, normalize=False)
     2
 
-    In the multilabel case with binary indicator format:
+    In the multilabel case with binary label indicators:
 
-    >>> jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]),\
+    >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]),\
         np.ones((2, 2)))
     0.75
-
-    and with a list of labels format:
-
-    >>> jaccard_similarity_score([(1, ), (3, )], [(1, 2), tuple()])
-    0.25
-
     """
 
     # Compute accuracy for each possible representation
@@ -1252,10 +1239,10 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) labels.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Predicted labels, as returned by a classifier.
 
     normalize : bool, optional (default=True)
@@ -1295,16 +1282,10 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
     >>> accuracy_score(y_true, y_pred, normalize=False)
     2
 
-    In the multilabel case with binary indicator format:
+    In the multilabel case with binary label indicators:
 
-    >>> accuracy_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))
+    >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
     0.5
-
-    and with a list of labels format:
-
-    >>> accuracy_score([(1, ), (3, )], [(1, 2), tuple()])
-    0.0
-
     """
 
     # Compute accuracy for each possible representation
@@ -1343,10 +1324,10 @@ def f1_score(y_true, y_pred, labels=None, pos_label=1, average='weighted',
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     labels : array
@@ -1426,10 +1407,10 @@ def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     beta: float
@@ -1585,10 +1566,10 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     beta : float, 1.0 by default
@@ -1830,10 +1811,10 @@ def precision_score(y_true, y_pred, labels=None, pos_label=1,
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     labels : array
@@ -1912,10 +1893,10 @@ def recall_score(y_true, y_pred, labels=None, pos_label=1, average='weighted',
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     labels : array
@@ -1987,10 +1968,10 @@ def classification_report(y_true, y_pred, labels=None, target_names=None,
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) target values.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Estimated targets as returned by a classifier.
 
     labels : array, shape = [n_labels]
@@ -2081,10 +2062,10 @@ def hamming_loss(y_true, y_pred, classes=None):
 
     Parameters
     ----------
-    y_true : array-like or list of labels or label indicator matrix
+    y_true : array-like or label indicator matrix
         Ground truth (correct) labels.
 
-    y_pred : array-like or list of labels or label indicator matrix
+    y_pred : array-like or label indicator matrix
         Predicted labels, as returned by a classifier.
 
     classes : array, shape = [n_labels], optional
@@ -2132,16 +2113,10 @@ def hamming_loss(y_true, y_pred, classes=None):
     >>> hamming_loss(y_true, y_pred)
     0.25
 
-    In the multilabel case with binary indicator format:
+    In the multilabel case with binary label indicators:
 
-    >>> hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.zeros((2, 2)))
+    >>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))
     0.75
-
-    and with a list of labels format:
-
-    >>> hamming_loss([(1, 2), (3, )], [(1, 2), tuple()])  # doctest: +ELLIPSIS
-    0.166...
-
     """
     y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred)
 
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
@@ -143,9 +143,7 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
 
     This strategy can also be used for multilabel learning, where a classifier
     is used to predict multiple labels for instance, by fitting on a 2-d matrix
-    in which cell [i, j] is 1 sample i has label j and 0 otherwise.
-    For multilabel learning, the number of classes must be at
-    least three, since otherwise OvR reduces to binary classification.
+    in which cell [i, j] is 1 if sample i has label j and 0 otherwise.
 
     In the multilabel learning literature, OvR is also known as the binary
     relevance method.
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
@@ -249,8 +249,7 @@ def fit(self, y):
         ----------
         y : numpy array of shape (n_samples,) or (n_samples, n_classes)
             Target values. The 2-d matrix should only contain 0 and 1,
-            represents multilabel classification, and is returned unchanged
-            by LabelBinarizer.
+            represents multilabel classification.
 
         Returns
         -------
@@ -275,8 +274,7 @@ def transform(self, y):
         ----------
         y : numpy array of shape (n_samples,) or (n_samples, n_classes)
             Target values. The 2-d matrix should only contain 0 and 1,
-            represents multilabel classification, and is returned unchanged
-            by LabelBinarizer.
+            represents multilabel classification.
 
         Returns
         -------
@@ -318,8 +316,7 @@ def inverse_transform(self, Y, threshold=None):
         -------
         y : numpy array of shape (n_samples,) or (n_samples, n_classes)
             Target values. The 2-d matrix should only contain 0 and 1,
-            represents multilabel classification, and is returned unchanged
-            by LabelBinarizer.
+            represents multilabel classification.
 
         Notes
         -----
@@ -569,8 +566,8 @@ def _transform(self, y, class_mapping):
         for labels in y:
             indices.extend(set(class_mapping[label] for label in labels))
             indptr.append(len(indices))
-        # virtual array of len(indices) 1s:
         data = np.ones(len(indices), dtype=int)
+
         return sp.csr_matrix((data, indices, indptr),
                              shape=(len(indptr) - 1, len(class_mapping)))
 

Original file line number	Diff line number	Diff line change
@@ -259,7 +259,7 @@ where :math:`1(x)` is the `indicator function
`259`	`259`	`>>> accuracy_score(y_true, y_pred, normalize=False)`
`260`	`260`	`2`
`261`	`261`
`262`		`-In the multilabel case with binary label indicators:`
	`262`	`+In the multilabel case with binary label indicators: ::`
`263`	`263`
`264`	`264`	`>>> accuracy_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))`
`265`	`265`	`0.5`
`@@ -424,7 +424,7 @@ score is equal to the classification accuracy.`
`424`	`424`	`>>> jaccard_similarity_score(y_true, y_pred, normalize=False)`
`425`	`425`	`2`
`426`	`426`
`427`		`-In the multilabel case with binary label indicators:`
	`427`	`+In the multilabel case with binary label indicators: ::`
`428`	`428`
`429`	`429`	`>>> jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.ones((2, 2)))`
`430`	`430`	`0.75`