From 535c1e49171eea712d04f28769c7ebf697e675f9 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Mon, 23 Sep 2024 10:57:42 -0700 Subject: [PATCH 01/13] Add incremental algorithms support --- configs/incremental.json | 99 ++++++++++++++++++++++++ sklbench/benchmarks/sklearn_estimator.py | 36 +++++++-- sklbench/report/implementation.py | 10 ++- test-configuration-linux.yml | 5 ++ test-configuration-win.yml | 4 + 5 files changed, 144 insertions(+), 10 deletions(-) create mode 100644 configs/incremental.json diff --git a/configs/incremental.json b/configs/incremental.json new file mode 100644 index 00000000..5f7a5477 --- /dev/null +++ b/configs/incremental.json @@ -0,0 +1,99 @@ +{ + "PARAMETERS_SETS": { + "common": {"bench": {"n_runs": 10, "time_limit": 60}}, + "covariance data": { + "data": [ + { + "source": "make_blobs", + "generation_kwargs": { + "centers": 1, + "n_samples": 1000, + "n_features": [16, 64] + }, + "split_kwargs": {"ignore": true} + } + ] + }, + "basic_statistics data": { + "data": { + "source": "make_blobs", + "generation_kwargs": { + "centers": 1, + "n_samples": 10000, + "n_features": [16, 64] + }, + "split_kwargs": {"ignore": true} + } + }, + "linear_regression data": { + "data": { + "source": "make_regression", + "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, + "generation_kwargs": { + "n_samples": 5000, + "n_features": [40, 100], + "n_informative": 5, + "noise": 2.0 + } + } + }, + "pca data": { + "data": { + "source": "make_blobs", + "generation_kwargs": { + "centers": 1, + "n_samples": 1000, + "n_features": [16, 64] + }, + "split_kwargs": {"ignore": true} + } + }, + "covariance": { + "algorithm": [ + { + "estimator": "IncrementalEmpiricalCovariance", + "library": "sklearnex.covariance", + "estimator_methods": {"training": "partial_fit"}, + "num_batches": {"training": 2} + } + ] + }, + "basic_statistics": { + "algorithm": [ + { + "estimator": "IncrementalBasicStatistics", + "library": "sklearnex.basic_statistics", + "num_batches": {"training": 2} + } + ] + }, + "linear_regression": { + "algorithm": [ + { + "estimator": "IncrementalLinearRegression", + "library": "sklearnex.linear_model", + "num_batches": {"training": 2} + } + ] + }, + "pca": { + "algorithm": [ + { + "estimator": "IncrementalPCA", + "library": "sklearnex.preview.decomposition", + "num_batches": {"training": 2} + } + ] + } + }, + "TEMPLATES": { + "covariance": {"SETS": ["common", "covariance", "covariance data"]}, + "basic_statistics": { + "SETS": ["common", "basic_statistics", "basic_statistics data"] + }, + "linear_regression": { + "SETS": ["common", "linear_regression", "linear_regression data"] + }, + "pca": {"SETS": ["common", "pca", "pca data"]} + } +} diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index f9c0a75e..4cdde86d 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -74,7 +74,7 @@ def get_estimator(library_name: str, estimator_name: str): def get_estimator_methods(bench_case: BenchCase) -> Dict[str, List[str]]: # default estimator methods estimator_methods = { - "training": ["fit"], + "training": ["partial_fit", "fit"], "inference": ["predict", "predict_proba", "transform"], } for stage in estimator_methods.keys(): @@ -334,7 +334,9 @@ def verify_patching(stream: io.StringIO, function_name) -> bool: return acceleration_lines > 0 and fallback_lines == 0 -def create_online_function(method_instance, data_args, batch_size): +def create_online_function( + estimator_instance, method_instance, data_args, num_batches, batch_size +): n_batches = data_args[0].shape[0] // batch_size if "y" in list(inspect.signature(method_instance).parameters): @@ -345,6 +347,7 @@ def ndarray_function(x, y): x[i * batch_size : (i + 1) * batch_size], y[i * batch_size : (i + 1) * batch_size], ) + estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): for i in range(n_batches): @@ -352,16 +355,19 @@ def dataframe_function(x, y): x.iloc[i * batch_size : (i + 1) * batch_size], y.iloc[i * batch_size : (i + 1) * batch_size], ) + estimator_instance._onedal_finalize_fit() else: def ndarray_function(x): for i in range(n_batches): method_instance(x[i * batch_size : (i + 1) * batch_size]) + estimator_instance._onedal_finalize_fit() def dataframe_function(x): for i in range(n_batches): method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) + estimator_instance._onedal_finalize_fit() if "ndarray" in str(type(data_args[0])): return ndarray_function @@ -414,12 +420,28 @@ def measure_sklearn_estimator( data_args = (x_train,) else: data_args = (x_test,) - batch_size = get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" - ) - if batch_size is not None: + + if method == "partial_fit": + num_batches = get_bench_case_value(bench_case, "data:num_batches") + batch_size = get_bench_case_value(bench_case, "data:batch_size") + + if batch_size is None: + if num_batches is None: + num_batches = 5 + batch_size = ( + data_args[0].shape[0] + num_batches - 1 + ) // num_batches + if num_batches is None: + num_batches = ( + data_args[0].shape[0] + batch_size - 1 + ) // batch_size + method_instance = create_online_function( - method_instance, data_args, batch_size + estimator_instance, + method_instance, + data_args, + num_batches, + batch_size, ) # daal4py model builders enabling branch if enable_modelbuilders and stage == "inference": diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index b577ab55..df15b5eb 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -16,7 +16,7 @@ import argparse import json -from typing import Dict, List +from typing import Dict, Hashable, List import openpyxl as xl import pandas as pd @@ -239,6 +239,7 @@ def get_result_tables_as_df( bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) + bench_cases = bench_cases.map(lambda x: str(x) if not isinstance(x, Hashable) else x) if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) @@ -248,7 +249,7 @@ def get_result_tables_as_df( bench_cases.drop(columns=[column], inplace=True) diffby_columns.remove(column) - return split_df_by_columns(bench_cases, splitby_columns) + return split_df_by_columns(bench_cases, splitby_columns, False) def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: @@ -258,7 +259,10 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: # only relative improvements are included in summary currently if len(column) > 1 and column[1] == f"{metric_name} relative improvement": metric_columns.append(column) - summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T + if metric_columns: + summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T + else: + summary = pd.DataFrame() summary.index = pd.Index([df_name]) return summary diff --git a/test-configuration-linux.yml b/test-configuration-linux.yml index a37769ce..722d1008 100644 --- a/test-configuration-linux.yml +++ b/test-configuration-linux.yml @@ -45,6 +45,11 @@ steps: conda activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run + - script: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate bench-env + python -m sklbench --report -l DEBUG --report -c configs/incremental.json + displayName: Incremental algorithms example run - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate bench-env diff --git a/test-configuration-win.yml b/test-configuration-win.yml index a1eddaeb..82c3152a 100644 --- a/test-configuration-win.yml +++ b/test-configuration-win.yml @@ -43,6 +43,10 @@ steps: call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run + - script: | + call activate bench-env + python -m sklbench --report -l DEBUG --report -c configs/incremental.json + displayName: Incremental algorithms example run - script: | call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/xgboost_example.json From d6952ac74715dcb0910626f9e5dce1c2eb1a3827 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Mon, 23 Sep 2024 11:49:37 -0700 Subject: [PATCH 02/13] Fix win yml --- test-configuration-win.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-configuration-win.yml b/test-configuration-win.yml index 82c3152a..f3ac1595 100644 --- a/test-configuration-win.yml +++ b/test-configuration-win.yml @@ -43,7 +43,7 @@ steps: call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run - - script: | + - script: | call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/incremental.json displayName: Incremental algorithms example run From 03a152a13c62eef3fa66b61109b76874d4e9b2b1 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Tue, 24 Sep 2024 02:46:36 -0700 Subject: [PATCH 03/13] Remove samples/ms info --- sklbench/benchmarks/sklearn_estimator.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 4cdde86d..7e616273 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -458,10 +458,6 @@ def measure_sklearn_estimator( metrics[method]["time std[ms]"], _, ) = measure_case(bench_case, method_instance, *data_args) - if batch_size is not None: - metrics[method]["throughput[samples/ms]"] = ( - (data_args[0].shape[0] // batch_size) * batch_size - ) / metrics[method]["time[ms]"] if ensure_sklearnex_patching: full_method_name = f"{estimator_class.__name__}.{method}" sklearnex_logging_stream.seek(0) From 3ac5c236eb6255892e607a6122d4d2187e4c5451 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Tue, 24 Sep 2024 06:45:42 -0700 Subject: [PATCH 04/13] Remove BS from config (need to add after pip version update) --- configs/incremental.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/configs/incremental.json b/configs/incremental.json index 5f7a5477..c9ffb19c 100644 --- a/configs/incremental.json +++ b/configs/incremental.json @@ -88,9 +88,6 @@ }, "TEMPLATES": { "covariance": {"SETS": ["common", "covariance", "covariance data"]}, - "basic_statistics": { - "SETS": ["common", "basic_statistics", "basic_statistics data"] - }, "linear_regression": { "SETS": ["common", "linear_regression", "linear_regression data"] }, From 9461fad69a00ecbf69a3e5fcef662fb1bafd4253 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Wed, 25 Sep 2024 02:00:29 -0700 Subject: [PATCH 05/13] Add condition for finalize --- sklbench/benchmarks/sklearn_estimator.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 7e616273..52f5bf4e 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -347,7 +347,8 @@ def ndarray_function(x, y): x[i * batch_size : (i + 1) * batch_size], y[i * batch_size : (i + 1) * batch_size], ) - estimator_instance._onedal_finalize_fit() + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): for i in range(n_batches): @@ -355,19 +356,22 @@ def dataframe_function(x, y): x.iloc[i * batch_size : (i + 1) * batch_size], y.iloc[i * batch_size : (i + 1) * batch_size], ) - estimator_instance._onedal_finalize_fit() + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() else: def ndarray_function(x): for i in range(n_batches): method_instance(x[i * batch_size : (i + 1) * batch_size]) - estimator_instance._onedal_finalize_fit() + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() def dataframe_function(x): for i in range(n_batches): method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) - estimator_instance._onedal_finalize_fit() + if hasattr(estimator_instance, "_onedal_finalize_fit"): + estimator_instance._onedal_finalize_fit() if "ndarray" in str(type(data_args[0])): return ndarray_function From b82d772f26c1af7d261b78bf94ae97280c23c9e2 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Wed, 25 Sep 2024 09:51:39 -0700 Subject: [PATCH 06/13] Fix num_batches usage --- sklbench/benchmarks/sklearn_estimator.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 52f5bf4e..3f8b1641 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -337,12 +337,11 @@ def verify_patching(stream: io.StringIO, function_name) -> bool: def create_online_function( estimator_instance, method_instance, data_args, num_batches, batch_size ): - n_batches = data_args[0].shape[0] // batch_size if "y" in list(inspect.signature(method_instance).parameters): def ndarray_function(x, y): - for i in range(n_batches): + for i in range(num_batches): method_instance( x[i * batch_size : (i + 1) * batch_size], y[i * batch_size : (i + 1) * batch_size], @@ -351,7 +350,7 @@ def ndarray_function(x, y): estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): - for i in range(n_batches): + for i in range(num_batches): method_instance( x.iloc[i * batch_size : (i + 1) * batch_size], y.iloc[i * batch_size : (i + 1) * batch_size], @@ -362,13 +361,13 @@ def dataframe_function(x, y): else: def ndarray_function(x): - for i in range(n_batches): + for i in range(num_batches): method_instance(x[i * batch_size : (i + 1) * batch_size]) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() def dataframe_function(x): - for i in range(n_batches): + for i in range(num_batches): method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() From b5ad233d539803da41ae41f98e7997f68394ec35 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 27 Sep 2024 02:29:40 -0700 Subject: [PATCH 07/13] Reduce config --- configs/incremental.json | 40 +++++++++------------------------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/configs/incremental.json b/configs/incremental.json index c9ffb19c..f09927ee 100644 --- a/configs/incremental.json +++ b/configs/incremental.json @@ -1,7 +1,7 @@ { "PARAMETERS_SETS": { "common": {"bench": {"n_runs": 10, "time_limit": 60}}, - "covariance data": { + "unlabeled dataset": { "data": [ { "source": "make_blobs", @@ -14,18 +14,7 @@ } ] }, - "basic_statistics data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 10000, - "n_features": [16, 64] - }, - "split_kwargs": {"ignore": true} - } - }, - "linear_regression data": { + "labeled dataset": { "data": { "source": "make_regression", "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, @@ -37,22 +26,11 @@ } } }, - "pca data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000, - "n_features": [16, 64] - }, - "split_kwargs": {"ignore": true} - } - }, "covariance": { "algorithm": [ { "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex.covariance", + "library": "sklearnex", "estimator_methods": {"training": "partial_fit"}, "num_batches": {"training": 2} } @@ -62,7 +40,7 @@ "algorithm": [ { "estimator": "IncrementalBasicStatistics", - "library": "sklearnex.basic_statistics", + "library": "sklearnex", "num_batches": {"training": 2} } ] @@ -71,7 +49,7 @@ "algorithm": [ { "estimator": "IncrementalLinearRegression", - "library": "sklearnex.linear_model", + "library": "sklearnex", "num_batches": {"training": 2} } ] @@ -80,17 +58,17 @@ "algorithm": [ { "estimator": "IncrementalPCA", - "library": "sklearnex.preview.decomposition", + "library": "sklearnex.preview", "num_batches": {"training": 2} } ] } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "covariance data"]}, + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, "linear_regression": { - "SETS": ["common", "linear_regression", "linear_regression data"] + "SETS": ["common", "linear_regression", "labeled dataset"] }, - "pca": {"SETS": ["common", "pca", "pca data"]} + "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} } } From fc4ad2b12ffefebdc3fe3f7103d24fc997cdad0f Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 27 Sep 2024 04:53:32 -0700 Subject: [PATCH 08/13] Add covariance module to incremental config --- configs/incremental.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/incremental.json b/configs/incremental.json index f09927ee..d36e2a16 100644 --- a/configs/incremental.json +++ b/configs/incremental.json @@ -30,7 +30,7 @@ "algorithm": [ { "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex", + "library": "sklearnex.covariance", "estimator_methods": {"training": "partial_fit"}, "num_batches": {"training": 2} } From 040802dc7229b4713b5ccab4de4248505e762b65 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 02:49:02 -0700 Subject: [PATCH 09/13] Rename example config --- .../{incremental.json => sklearnex_incremental_example.json} | 0 test-configuration-linux.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename configs/{incremental.json => sklearnex_incremental_example.json} (100%) diff --git a/configs/incremental.json b/configs/sklearnex_incremental_example.json similarity index 100% rename from configs/incremental.json rename to configs/sklearnex_incremental_example.json diff --git a/test-configuration-linux.yml b/test-configuration-linux.yml index 722d1008..d8c1a64e 100644 --- a/test-configuration-linux.yml +++ b/test-configuration-linux.yml @@ -48,7 +48,7 @@ steps: - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/incremental.json + python -m sklbench --report -l DEBUG --report -c configs/sklearnex_incremental_example.json displayName: Incremental algorithms example run - script: | source /usr/share/miniconda/etc/profile.d/conda.sh From 69cc4c1754024b2817fe87b3a0d89a926b45658b Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 03:54:18 -0700 Subject: [PATCH 10/13] Remove bs mentioning in config (need to be added later) --- configs/sklearnex_incremental_example.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index d36e2a16..37b2c7fb 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -36,15 +36,6 @@ } ] }, - "basic_statistics": { - "algorithm": [ - { - "estimator": "IncrementalBasicStatistics", - "library": "sklearnex", - "num_batches": {"training": 2} - } - ] - }, "linear_regression": { "algorithm": [ { From f275062098635b049f2ff822c524c44f7b62422a Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 08:36:17 -0700 Subject: [PATCH 11/13] Fix num_batches and batch_size reading from config --- configs/sklearnex_incremental_example.json | 6 +----- sklbench/benchmarks/sklearn_estimator.py | 8 ++++++-- sklbench/report/implementation.py | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index 37b2c7fb..1fbbcafa 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -56,10 +56,6 @@ } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, - "linear_regression": { - "SETS": ["common", "linear_regression", "labeled dataset"] - }, - "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]} } } diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 3f8b1641..c4f94c47 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -425,8 +425,12 @@ def measure_sklearn_estimator( data_args = (x_test,) if method == "partial_fit": - num_batches = get_bench_case_value(bench_case, "data:num_batches") - batch_size = get_bench_case_value(bench_case, "data:batch_size") + num_batches = get_bench_case_value( + bench_case, f"algorithm:num_batches:{stage}" + ) + batch_size = get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ) if batch_size is None: if num_batches is None: diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index df15b5eb..af0398dd 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -236,6 +236,7 @@ def get_result_tables_as_df( splitby_columns=["estimator", "method", "function"], compatibility_mode=False, ): + print(results["bench_cases"]) bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) @@ -244,6 +245,7 @@ def get_result_tables_as_df( if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) + print(bench_cases) for column in diffby_columns.copy(): if bench_cases[column].nunique() == 1: bench_cases.drop(columns=[column], inplace=True) From 5a9be80616e5dca5e50bd27145ce11c6316b4c2d Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 08:41:09 -0700 Subject: [PATCH 12/13] Revert accidentally pushed changes --- configs/sklearnex_incremental_example.json | 6 +++++- sklbench/report/implementation.py | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index 1fbbcafa..37b2c7fb 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -56,6 +56,10 @@ } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]} + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, + "linear_regression": { + "SETS": ["common", "linear_regression", "labeled dataset"] + }, + "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} } } diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index af0398dd..df15b5eb 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -236,7 +236,6 @@ def get_result_tables_as_df( splitby_columns=["estimator", "method", "function"], compatibility_mode=False, ): - print(results["bench_cases"]) bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) @@ -245,7 +244,6 @@ def get_result_tables_as_df( if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) - print(bench_cases) for column in diffby_columns.copy(): if bench_cases[column].nunique() == 1: bench_cases.drop(columns=[column], inplace=True) From 1d48f3a1b35668def560fb05c4b200783102cfda Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 17 Mar 2025 22:30:21 -0700 Subject: [PATCH 13/13] remove batch_size logic from incremental benchmarking for num_batches --- configs/README.md | 1 + sklbench/benchmarks/sklearn_estimator.py | 57 ++++++++++-------------- sklbench/report/implementation.py | 1 + 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/configs/README.md b/configs/README.md index 8d3c5ac2..07c92dc1 100644 --- a/configs/README.md +++ b/configs/README.md @@ -117,6 +117,7 @@ Configs have the three highest parameter keys: |:---------------|:--------------|:--------|:------------| | `algorithm`:`estimator` | None | | Name of measured estimator. | | `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. | +| `algorithm`:`training`:`num_batches` | 5 | | Number of batches to benchmark `partial_fit` function, using batches the size of number of samples specified (not samples divided by `num_batches`). For incremental estimators only. | | `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). | | `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. | | `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. | diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index c4f94c47..dd0ef1a5 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -334,27 +334,19 @@ def verify_patching(stream: io.StringIO, function_name) -> bool: return acceleration_lines > 0 and fallback_lines == 0 -def create_online_function( - estimator_instance, method_instance, data_args, num_batches, batch_size -): +def create_online_function(estimator_instance, method_instance, data_args, num_batches): if "y" in list(inspect.signature(method_instance).parameters): def ndarray_function(x, y): for i in range(num_batches): - method_instance( - x[i * batch_size : (i + 1) * batch_size], - y[i * batch_size : (i + 1) * batch_size], - ) + method_instance(x, y) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): for i in range(num_batches): - method_instance( - x.iloc[i * batch_size : (i + 1) * batch_size], - y.iloc[i * batch_size : (i + 1) * batch_size], - ) + method_instance(x, y) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() @@ -362,13 +354,13 @@ def dataframe_function(x, y): def ndarray_function(x): for i in range(num_batches): - method_instance(x[i * batch_size : (i + 1) * batch_size]) + method_instance(x) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() def dataframe_function(x): for i in range(num_batches): - method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) + method_instance(x) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() @@ -423,32 +415,20 @@ def measure_sklearn_estimator( data_args = (x_train,) else: data_args = (x_test,) + batch_size = get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ) if method == "partial_fit": num_batches = get_bench_case_value( - bench_case, f"algorithm:num_batches:{stage}" - ) - batch_size = get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" + bench_case, f"algorithm:num_batches:{stage}", 5 ) - if batch_size is None: - if num_batches is None: - num_batches = 5 - batch_size = ( - data_args[0].shape[0] + num_batches - 1 - ) // num_batches - if num_batches is None: - num_batches = ( - data_args[0].shape[0] + batch_size - 1 - ) // batch_size - method_instance = create_online_function( estimator_instance, method_instance, data_args, - num_batches, - batch_size, + num_batches ) # daal4py model builders enabling branch if enable_modelbuilders and stage == "inference": @@ -465,6 +445,10 @@ def measure_sklearn_estimator( metrics[method]["time std[ms]"], _, ) = measure_case(bench_case, method_instance, *data_args) + if batch_size is not None: + metrics[method]["throughput[samples/ms]"] = ( + (data_args[0].shape[0] // batch_size) * batch_size + ) / metrics[method]["time[ms]"] if ensure_sklearnex_patching: full_method_name = f"{estimator_class.__name__}.{method}" sklearnex_logging_stream.seek(0) @@ -561,9 +545,16 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): for stage in estimator_methods.keys(): data_descs[stage].update( { - "batch_size": get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" - ) + key: val + for key, val in { + "batch_size": get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ), + "num_batches": get_bench_case_value( + bench_case, f"algorithm:num_batches:{stage}" + ) + }.items() + if val is not None } ) if "n_classes" in data_description: diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 8e76479f..689396f1 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -94,6 +94,7 @@ "order", "n_classes", "n_clusters", + "num_batches", "batch_size", ] pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy