sdpython
diff --git a/‎_doc/examples/plot_piecewise_linear_regression_criterion.py
Lines changed: 21 additions & 17 deletions b/‎_doc/examples/plot_piecewise_linear_regression_criterion.py
Lines changed: 21 additions & 17 deletions
diff --git a/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py
Lines changed: 45 additions & 20 deletions b/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py
Lines changed: 45 additions & 20 deletions
diff --git a/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py
Lines changed: 23 additions & 19 deletions b/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py
Lines changed: 23 additions & 19 deletions
diff --git a/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py
Lines changed: 32 additions & 28 deletions b/‎_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py
Lines changed: 32 additions & 28 deletions
diff --git a/‎mlinsights/ext_test_case.py
Lines changed: 20 additions & 1 deletion b/‎mlinsights/ext_test_case.py
Lines changed: 20 additions & 1 deletion
@@ -152,25 +152,27 @@
 #
 # ::
 #
-#    cdef void _mean(self, SIZE_t start, SIZE_t end, DOUBLE_t *mean,
-#                    DOUBLE_t *weight) nogil:
+#    ctypedef double float64_t
+#
+#    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+#                    float64_t *weight) nogil:
 #        if start == end:
 #            mean[0] = 0.
 #            return
-#        cdef DOUBLE_t m = 0.
-#        cdef DOUBLE_t w = 0.
+#        cdef float64_t m = 0.
+#        cdef float64_t w = 0.
 #        cdef int k
 #        for k in range(start, end):
 #            m += self.sample_wy[k]
 #            w += self.sample_w[k]
 #        weight[0] = w
 #        mean[0] = 0. if w == 0. else m / w
 #
-#    cdef double _mse(self, SIZE_t start, SIZE_t end, DOUBLE_t mean,
-#                     DOUBLE_t weight) nogil:
+#    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+#                     float64_t weight) nogil:
 #        if start == end:
 #            return 0.
-#        cdef DOUBLE_t squ = 0.
+#        cdef float64_t squ = 0.
 #        cdef int k
 #        for k in range(start, end):
 #            squ += (self.y[self.sample_i[k], 0] - mean) ** 2 * self.sample_w[k]
@@ -189,24 +191,26 @@
 #
 # ::
 #
-#    cdef void _mean(self, SIZE_t start, SIZE_t end, DOUBLE_t *mean,
-#                    DOUBLE_t *weight) nogil:
+#    ctypedef double float64_t
+#
+#    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+#                    float64_t *weight) nogil:
 #        if start == end:
 #            mean[0] = 0.
 #            return
-#        cdef DOUBLE_t m = self.sample_wy_left[end-1] -
-#                          (self.sample_wy_left[start-1] if start > 0 else 0)
-#        cdef DOUBLE_t w = self.sample_w_left[end-1] -
-#                          (self.sample_w_left[start-1] if start > 0 else 0)
+#        cdef float64_t m = self.sample_wy_left[end-1] -
+#                           (self.sample_wy_left[start-1] if start > 0 else 0)
+#        cdef float64_t w = self.sample_w_left[end-1] -
+#                           (self.sample_w_left[start-1] if start > 0 else 0)
 #        weight[0] = w
 #        mean[0] = 0. if w == 0. else m / w
 #
-#    cdef double _mse(self, SIZE_t start, SIZE_t end, DOUBLE_t mean,
-#                     DOUBLE_t weight) nogil:
+#    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+#                        float64_t weight) nogil:
 #        if start == end:
 #            return 0.
-#        cdef DOUBLE_t squ = self.sample_wy2_left[end-1] -
-#                            (self.sample_wy2_left[start-1] if start > 0 else 0)
+#        cdef float64_t squ = self.sample_wy2_left[end-1] -
+#                             (self.sample_wy2_left[start-1] if start > 0 else 0)
 #        # This formula only holds if mean is computed on the same interval.
 #        # Otherwise, it is squ / weight - true_mean ** 2 + (mean - true_mean) ** 2.
 #        return 0. if weight == 0. else squ / weight - mean ** 2
 
@@ -21,10 +21,8 @@ def test_criterions(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             from mlinsights.mlmodel._piecewise_tree_regression_common import (
-                _test_criterion_check,
                 assert_criterion_equal,
-            )
-            from mlinsights.mlmodel._piecewise_tree_regression_common import (
+                _test_criterion_check,
                 _test_criterion_init,
                 _test_criterion_node_impurity,
                 _test_criterion_node_impurity_children,
@@ -38,10 +36,6 @@ def test_criterions(self):
                 SimpleRegressorCriterion,
             )
 
-            if len(w) > 0:
-                msg = "\n".join(map(str, w))
-                raise AssertionError(f"Warning while importing the library:\n{msg}")
-
         X = numpy.array([[1.0, 2.0]]).T
         y = numpy.array([1.0, 2.0])
         c1 = MSE(1, X.shape[0])
@@ -113,6 +107,8 @@ def test_criterions(self):
         assert_criterion_equal(c1, c2)
         self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))
 
+        expected_p2 = [-0.56, -0.04, -0.56]
+
         for i in range(1, 4):
             _test_criterion_check(c2)
             _test_criterion_update(c1, i)
@@ -122,23 +118,27 @@ def test_criterions(self):
             self.assertIsInstance(c2.printd(), str)
             left1, right1 = _test_criterion_node_impurity_children(c1)
             left2, right2 = _test_criterion_node_impurity_children(c2)
-            self.assertAlmostEqual(left1, left2)
+            self.assertAlmostEqual(left1, left2, atol=1e-10)
             self.assertAlmostEqual(right1, right2, atol=1e-10)
             v1 = _test_criterion_node_value(c1)
             v2 = _test_criterion_node_value(c2)
             self.assertEqual(v1, v2)
             p1 = _test_criterion_impurity_improvement(c1, 0.0, left1, right1)
             p2 = _test_criterion_impurity_improvement(c2, 0.0, left2, right2)
-            self.assertIn(
-                "value: 1.500000 total=0.260000 left=0.000000 right=0.186667",
-                _test_criterion_printf(c1),
-            )
-            self.assertIn(
-                "value: 1.500000 total=0.260000 left=0.000000 right=0.186667",
-                _test_criterion_printf(c2),
-            )
+            if i == 1:
+                self.assertIn(
+                    "value: 1.500000 total=0.260000 left=0.000000 right=0.186667",
+                    _test_criterion_printf(c1),
+                )
+                self.assertIn(
+                    "value: 1.500000 total=0.260000 left=0.000000 right=0.186667",
+                    _test_criterion_printf(c2),
+                )
             self.assertEqual(_test_criterion_printf(c1), _test_criterion_printf(c2))
-            self.assertAlmostEqual(p1, p2, atol=1e-10)
+            self.assertInAlmostEqual(
+                p1, (0, p2), atol=1e-10
+            )  # 0 if the function is not called
+            self.assertAlmostEqual(expected_p2[i - 1], p2, atol=1e-10)
 
         X = numpy.array([[1.0, 2.0, 10.0, 11.0]]).T
         y = numpy.array([0.9, 1.1, 1.9, 2.1])
@@ -159,37 +159,62 @@ def test_criterions(self):
         p2 = _test_criterion_proxy_impurity_improvement(c2)
         self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))
 
+        expected_p2 = [-0.32, -0.02]
+
         for i in range(2, 4):
             _test_criterion_update(c1, i)
             _test_criterion_update(c2, i)
             left1, right1 = _test_criterion_node_impurity_children(c1)
             left2, right2 = _test_criterion_node_impurity_children(c2)
-            self.assertAlmostEqual(left1, left2)
-            self.assertAlmostEqual(right1, right2)
+            self.assertAlmostEqual(left1, left2, atol=1e-10)
+            self.assertAlmostEqual(right1, right2, atol=1e-10)
             v1 = _test_criterion_node_value(c1)
             v2 = _test_criterion_node_value(c2)
             self.assertEqual(v1, v2)
             p1 = _test_criterion_impurity_improvement(c1, 0.0, left1, right1)
             p2 = _test_criterion_impurity_improvement(c2, 0.0, left2, right2)
-            self.assertAlmostEqual(p1, p2)
+            self.assertInAlmostEqual(
+                p1, (0, p2), atol=1e-10
+            )  # 0 if the function is not called
+            self.assertAlmostEqual(expected_p2[i - 2], p2, atol=1e-10)
 
     def test_decision_tree_criterion(self):
         from mlinsights.mlmodel.piecewise_tree_regression_criterion import (
             SimpleRegressorCriterion,
         )
 
+        debug = __name__ == "__main__"
+
         X = numpy.array([[1.0, 2.0, 10.0, 11.0]]).T
         y = numpy.array([0.9, 1.1, 1.9, 2.1])
+        if debug:
+            print("create the tree")
         clr1 = DecisionTreeRegressor(max_depth=1)
+        if debug:
+            print("train the tree")
         clr1.fit(X, y)
+        if debug:
+            print("predict with the tree")
         p1 = clr1.predict(X)
+        if debug:
+            print(f"done {p1}")
 
+        if debug:
+            print("create the criterion")
         crit = SimpleRegressorCriterion(
             1 if len(y.shape) <= 1 else y.shape[1], X.shape[0]
         )
+        if debug:
+            print("create the new tree")
         clr2 = DecisionTreeRegressor(criterion=crit, max_depth=1)
+        if debug:
+            print("train the new tree")
         clr2.fit(X, y)
+        if debug:
+            print("predict with the new tree")
         p2 = clr2.predict(X)
+        if debug:
+            print(f"done {p2}")
         self.assertEqual(p1, p2)
         self.assertEqual(clr1.tree_.node_count, clr2.tree_.node_count)
 
 
@@ -1,33 +1,37 @@
 # -*- coding: utf-8 -*-
 import unittest
+import warnings
 import numpy
 from sklearn.tree._criterion import MSE
 from sklearn.tree import DecisionTreeRegressor
 from sklearn import datasets
 from mlinsights.ext_test_case import ExtTestCase
 from mlinsights.mlmodel.piecewise_tree_regression import PiecewiseTreeRegressor
-from mlinsights.mlmodel._piecewise_tree_regression_common import (
-    _test_criterion_init,
-    _test_criterion_node_impurity,
-    _test_criterion_node_impurity_children,
-    _test_criterion_update,
-    _test_criterion_node_value,
-    _test_criterion_proxy_impurity_improvement,
-    _test_criterion_impurity_improvement,
-)
-from mlinsights.mlmodel._piecewise_tree_regression_common import (
-    assert_criterion_equal,
-)
-from mlinsights.mlmodel.piecewise_tree_regression_criterion_fast import (
-    SimpleRegressorCriterionFast,
-)
+
+with warnings.catch_warnings(record=True) as w:
+    warnings.simplefilter("always")
+    from mlinsights.mlmodel._piecewise_tree_regression_common import (
+        _test_criterion_init,
+        _test_criterion_node_impurity,
+        _test_criterion_node_impurity_children,
+        _test_criterion_update,
+        _test_criterion_node_value,
+        _test_criterion_proxy_impurity_improvement,
+        _test_criterion_impurity_improvement,
+    )
+    from mlinsights.mlmodel._piecewise_tree_regression_common import (
+        assert_criterion_equal,
+    )
+    from mlinsights.mlmodel.piecewise_tree_regression_criterion_fast import (
+        SimpleRegressorCriterionFast,
+    )
 
 
 class TestPiecewiseDecisionTreeExperimentFast(ExtTestCase):
-    @unittest.skip(
-        reason="self.y = y raises: Fatal Python error: "
-        "__pyx_fatalerror: Acquisition count is"
-    )
+    # @unittest.skip(
+    #    reason="self.y = y raises: Fatal Python error: "
+    #    "__pyx_fatalerror: Acquisition count is"
+    # )
     def test_criterions(self):
         X = numpy.array([[1.0, 2.0]]).T
         y = numpy.array([1.0, 2.0])
 
@@ -1,31 +1,35 @@
 # -*- coding: utf-8 -*-
 import unittest
+import warnings
 import numpy
 from sklearn.tree._criterion import MSE
 from sklearn.tree import DecisionTreeRegressor
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
 from mlinsights.ext_test_case import ExtTestCase
 from mlinsights.mlmodel.piecewise_tree_regression import PiecewiseTreeRegressor
-from mlinsights.mlmodel._piecewise_tree_regression_common import (
-    _test_criterion_init,
-    _test_criterion_node_impurity,
-    _test_criterion_node_impurity_children,
-    _test_criterion_update,
-    _test_criterion_node_value,
-    _test_criterion_proxy_impurity_improvement,
-    _test_criterion_impurity_improvement,
-)
-from mlinsights.mlmodel.piecewise_tree_regression_criterion_linear import (
-    LinearRegressorCriterion,
-)
+
+with warnings.catch_warnings(record=True) as w:
+    warnings.simplefilter("always")
+    from mlinsights.mlmodel._piecewise_tree_regression_common import (
+        _test_criterion_init,
+        _test_criterion_node_impurity,
+        _test_criterion_node_impurity_children,
+        _test_criterion_update,
+        _test_criterion_node_value,
+        _test_criterion_proxy_impurity_improvement,
+        _test_criterion_impurity_improvement,
+    )
+    from mlinsights.mlmodel.piecewise_tree_regression_criterion_linear import (
+        LinearRegressorCriterion,
+    )
 
 
 class TestPiecewiseDecisionTreeExperimentLinear(ExtTestCase):
-    @unittest.skip(
-        reason="self.y = y raises: Fatal Python error: "
-        "__pyx_fatalerror: Acquisition count is"
-    )
+    # @unittest.skip(
+    #    reason="self.y = y raises: Fatal Python error: "
+    #    "__pyx_fatalerror: Acquisition count is"
+    # )
     def test_criterions(self):
         X = numpy.array([[10.0, 12.0, 13.0]]).T
         y = numpy.array([20.0, 22.0, 23.0])
@@ -127,10 +131,10 @@ def test_criterions(self):
             self.assertGreater(dest[0], 0)
             self.assertGreater(dest[1], 0)
 
-    @unittest.skip(
-        reason="self.y = y raises: Fatal Python error: "
-        "__pyx_fatalerror: Acquisition count is"
-    )
+    # @unittest.skip(
+    #    reason="self.y = y raises: Fatal Python error: "
+    #    "__pyx_fatalerror: Acquisition count is"
+    # )
     def test_criterions_check_value(self):
         X = numpy.array([[10.0, 12.0, 13.0]]).T
         y = numpy.array([[20.0, 22.0, 23.0]]).T
@@ -164,10 +168,10 @@ def test_decision_tree_criterion_iris(self):
         p2 = clr2.predict(X)
         self.assertEqual(p1.shape, p2.shape)
 
-    @unittest.skip(
-        reason="self.y = y raises: Fatal Python error: "
-        "__pyx_fatalerror: Acquisition count is"
-    )
+    # @unittest.skip(
+    #    reason="self.y = y raises: Fatal Python error: "
+    #    "__pyx_fatalerror: Acquisition count is"
+    # )
     def test_decision_tree_criterion_iris_dtc(self):
         iris = datasets.load_iris()
         X, y = iris.data, iris.target
@@ -191,10 +195,10 @@ def test_decision_tree_criterion_iris_dtc(self):
         self.assertIsInstance(mp, dict)
         self.assertGreater(len(mp), 2)
 
-    @unittest.skip(
-        reason="self.y = y raises: Fatal Python error: "
-        "__pyx_fatalerror: Acquisition count is"
-    )
+    # @unittest.skip(
+    #    reason="self.y = y raises: Fatal Python error: "
+    #    "__pyx_fatalerror: Acquisition count is"
+    # )
     def test_decision_tree_criterion_iris_dtc_traintest(self):
         iris = datasets.load_iris()
         X, y = iris.data, iris.target
 
@@ -9,7 +9,7 @@
 from contextlib import redirect_stderr, redirect_stdout
 from io import StringIO
 from timeit import Timer
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 import numpy
 from numpy.testing import assert_allclose
 import pandas
@@ -231,6 +231,25 @@ def assertEqualDataFrame(self, d1, d2, **kwargs):
 
         assert_frame_equal(d1, d2, **kwargs)
 
+    def assertInAlmostEqual(
+        self,
+        value: float,
+        expected_values: Sequence[float],
+        atol: float = 0,
+        rtol: float = 0,
+    ):
+        last_e = None
+        for s in expected_values:
+            try:
+                self.assertAlmostEqual(value, s, atol=atol, rtol=rtol)
+                return
+            except AssertionError as e:
+                last_e = e
+        if last_e is not None:
+            raise AssertionError(
+                f"Value {value} not in set {expected_values}."
+            ) from last_e
+
     def assertAlmostEqual(
         self,
         expected: numpy.ndarray,