From 103bc8452baaac68e6a91636f657e00b643bc48e Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Mon, 19 Sep 2022 15:53:13 -0700 Subject: [PATCH 01/36] Use v1.62 --- .gitmodules | 3 +-- xgboost-sys/xgboost | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index cbbe4a5..68a3c82 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,3 @@ [submodule "xgboost-sys/xgboost"] path = xgboost-sys/xgboost - url = https://github.com/davechallis/xgboost - branch = master + url = https://github.com/dmlc/xgboost diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 61671a8..b993424 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 61671a80dc42946882b562fda7b004b3967f0556 +Subproject commit b9934246faa9a25e10a12339685dfbe56d56f70b From 0575c85abb1576e69fcaaf1390e2e4d301ffb80a Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Mon, 19 Sep 2022 15:53:35 -0700 Subject: [PATCH 02/36] Use Path --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b9d6584..465ee70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ documentation = "https://docs.rs/xgboost" readme = "README.md" [dependencies] -xgboost-sys = "0.2.0" +xgboost-sys = { path = "xgboost-sys" } libc = "0.2" derive_builder = "0.5" log = "0.4" From 16e0e76c63d6ba2a29bac5694ceeee2635082388 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 25 Oct 2022 10:35:10 -0700 Subject: [PATCH 03/36] add cuda to the build, upgrade bindgen version and fix some tests --- Cargo.toml | 3 +++ src/booster.rs | 8 ++++---- src/dmatrix.rs | 13 +++++++------ xgboost-sys/Cargo.toml | 2 +- xgboost-sys/build.rs | 17 ++++++++++++++++- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 465ee70..1837680 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,6 @@ derive_builder = "0.5" log = "0.4" tempfile = "3.0" indexmap = "1.0" + +[features] +cuda = [] diff --git a/src/booster.rs b/src/booster.rs index 1f2dbac..634a71d 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -774,12 +774,12 @@ mod tests { } let train_metrics = booster.evaluate(&dmat_train).unwrap(); - assert_eq!(*train_metrics.get("logloss").unwrap(), 0.006634); - assert_eq!(*train_metrics.get("map@4-").unwrap(), 0.001274); + assert_eq!(*train_metrics.get("logloss").unwrap(), 0.006634271); + assert_eq!(*train_metrics.get("map@4-").unwrap(), 0.0012738854); let test_metrics = booster.evaluate(&dmat_test).unwrap(); - assert_eq!(*test_metrics.get("logloss").unwrap(), 0.00692); - assert_eq!(*test_metrics.get("map@4-").unwrap(), 0.005155); + assert_eq!(*test_metrics.get("logloss").unwrap(), 0.006919953); + assert_eq!(*test_metrics.get("map@4-").unwrap(), 0.005154639); let v = booster.predict(&dmat_test).unwrap(); assert_eq!(v.len(), dmat_test.num_rows()); diff --git a/src/dmatrix.rs b/src/dmatrix.rs index c67a793..212b8b7 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -128,7 +128,6 @@ impl DMatrix { pub fn from_csr(indptr: &[usize], indices: &[usize], data: &[f32], num_cols: Option) -> XGBResult { assert_eq!(indices.len(), data.len()); let mut handle = ptr::null_mut(); - let indptr: Vec = indptr.iter().map(|x| *x as u64).collect(); let indices: Vec = indices.iter().map(|x| *x as u32).collect(); let num_cols = num_cols.unwrap_or(0); // infer from data if 0 xgb_call!(xgboost_sys::XGDMatrixCreateFromCSREx(indptr.as_ptr(), @@ -152,7 +151,6 @@ impl DMatrix { pub fn from_csc(indptr: &[usize], indices: &[usize], data: &[f32], num_rows: Option) -> XGBResult { assert_eq!(indices.len(), data.len()); let mut handle = ptr::null_mut(); - let indptr: Vec = indptr.iter().map(|x| *x as u64).collect(); let indices: Vec = indices.iter().map(|x| *x as u32).collect(); let num_rows = num_rows.unwrap_or(0); // infer from data if 0 xgb_call!(xgboost_sys::XGDMatrixCreateFromCSCEx(indptr.as_ptr(), @@ -349,7 +347,7 @@ mod tests { #[test] fn read_num_cols() { - assert_eq!(read_train_matrix().unwrap().num_cols(), 126); + assert_eq!(read_train_matrix().unwrap().num_cols(), 127); } #[test] @@ -380,7 +378,7 @@ mod tests { #[test] fn get_set_weights() { let mut dmat = read_train_matrix().unwrap(); - assert_eq!(dmat.get_weights().unwrap(), &[]); + assert!(dmat.get_weights().unwrap().is_empty()); let weight = [1.0, 10.0, 44.9555]; assert!(dmat.set_weights(&weight).is_ok()); @@ -390,9 +388,12 @@ mod tests { #[test] fn get_set_base_margin() { let mut dmat = read_train_matrix().unwrap(); - assert_eq!(dmat.get_base_margin().unwrap(), &[]); + assert!(dmat.get_base_margin().unwrap().is_empty()); let base_margin = [0.00001, 0.000002, 1.23]; + println!("rows: {:?}, {:?}", dmat.num_rows(), base_margin.len()); + let result = dmat.set_base_margin(&base_margin); + println!("{:?}", result); assert!(dmat.set_base_margin(&base_margin).is_ok()); assert_eq!(dmat.get_base_margin().unwrap(), base_margin); } @@ -400,7 +401,7 @@ mod tests { #[test] fn get_set_group() { let mut dmat = read_train_matrix().unwrap(); - assert_eq!(dmat.get_group().unwrap(), &[]); + assert!(dmat.get_group().unwrap().is_empty()); let group = [1]; assert!(dmat.set_group(&group).is_ok()); diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index cddc0ce..b0b45a0 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -13,5 +13,5 @@ readme = "README.md" libc = "0.2" [build-dependencies] -bindgen = "0.59" +bindgen = "0.61" cmake = "0.1" diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index b311d49..6c3e4f4 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -25,6 +25,9 @@ fn main() { let dst = Config::new(&xgb_root) .uses_cxx11() .define("BUILD_STATIC_LIB", "ON") + .define("USE_CUDA", "ON") + .define("BUILD_WITH_CUDA", "ON") + .define("BUILD_WITH_CUDA_CUB", "ON") .build(); let xgb_root = xgb_root.canonicalize().unwrap(); @@ -34,7 +37,11 @@ fn main() { .clang_args(&["-x", "c++", "-std=c++11"]) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) - .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())) + .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())); + + #[cfg(feature = "cuda")] + let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); + let bindings = bindings .generate() .expect("Unable to generate bindings."); @@ -60,4 +67,12 @@ fn main() { println!("cargo:rustc-link-search=native={}", dst.join("lib").display()); println!("cargo:rustc-link-lib=static=dmlc"); println!("cargo:rustc-link-lib=static=xgboost"); + + #[cfg(feature = "cuda")] + { + println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64"); + println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64/stubs"); + println!("cargo:rustc-link-lib=dylib=cuda"); + println!("cargo:rustc-link-lib=dylib=cudart"); + } } From 5b33ede55083e1aabea9399f9ad7343b9f8495d1 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 25 Oct 2022 18:42:52 -0700 Subject: [PATCH 04/36] make cuda a feature --- Cargo.toml | 3 --- xgboost-sys/Cargo.toml | 3 +++ xgboost-sys/build.rs | 7 +++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1837680..465ee70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,3 @@ derive_builder = "0.5" log = "0.4" tempfile = "3.0" indexmap = "1.0" - -[features] -cuda = [] diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index b0b45a0..20406d9 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -15,3 +15,6 @@ libc = "0.2" [build-dependencies] bindgen = "0.61" cmake = "0.1" + +[features] +cuda = [] diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 6c3e4f4..6713003 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -22,6 +22,7 @@ fn main() { } // CMake + #[cfg(feature = "cuda")] let dst = Config::new(&xgb_root) .uses_cxx11() .define("BUILD_STATIC_LIB", "ON") @@ -30,6 +31,12 @@ fn main() { .define("BUILD_WITH_CUDA_CUB", "ON") .build(); + #[cfg(not(feature = "cuda"))] + let dst = Config::new(&xgb_root) + .uses_cxx11() + .define("BUILD_STATIC_LIB", "ON") + .build(); + let xgb_root = xgb_root.canonicalize().unwrap(); let bindings = bindgen::Builder::default() From 21e5ae3062028ed501553bdede00506fc53732f9 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 25 Oct 2022 18:46:45 -0700 Subject: [PATCH 05/36] re-export feature --- Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 465ee70..2513a82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,6 @@ derive_builder = "0.5" log = "0.4" tempfile = "3.0" indexmap = "1.0" + +[features] +cuda = ["xgboost-sys/cuda"] From c772eea5a7f5c2ca822d8c5f4b49e72a12686bbb Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 25 Oct 2022 19:05:44 -0700 Subject: [PATCH 06/36] update derive --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2513a82..8950644 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ readme = "README.md" [dependencies] xgboost-sys = { path = "xgboost-sys" } libc = "0.2" -derive_builder = "0.5" +derive_builder = "0.11" log = "0.4" tempfile = "3.0" indexmap = "1.0" From 832979e4e315aac17e0d087d2b01655db1312f21 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 25 Oct 2022 20:25:42 -0700 Subject: [PATCH 07/36] it really is 126 --- xgboost-sys/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xgboost-sys/src/lib.rs b/xgboost-sys/src/lib.rs index 78b8c72..a365c4c 100644 --- a/xgboost-sys/src/lib.rs +++ b/xgboost-sys/src/lib.rs @@ -26,7 +26,7 @@ mod tests { let mut num_cols = 0; let ret_val = unsafe { XGDMatrixNumCol(handle, &mut num_cols) }; assert_eq!(ret_val, 0); - assert_eq!(num_cols, 127); + assert_eq!(num_cols, 126); let ret_val = unsafe { XGDMatrixFree(handle) }; assert_eq!(ret_val, 0); From e5a94b48b6ebcdf326b0b2b9e69d9bebc39ee3b8 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Wed, 26 Oct 2022 10:09:28 -0700 Subject: [PATCH 08/36] Expose set_param --- src/booster.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/booster.rs b/src/booster.rs index 634a71d..ac5c172 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -533,7 +533,7 @@ impl Booster { xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) } - fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { + pub(crate) fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); xgb_call!(xgboost_sys::XGBoosterSetParam(self.handle, name.as_ptr(), value.as_ptr())) From 0019487b339c90b6e929c0d51e3611880b4de986 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Wed, 26 Oct 2022 16:17:00 -0700 Subject: [PATCH 09/36] static link --- xgboost-sys/build.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 6713003..f7be737 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -78,8 +78,6 @@ fn main() { #[cfg(feature = "cuda")] { println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64"); - println!("cargo:rustc-link-search={}", "/usr/local/cuda/lib64/stubs"); - println!("cargo:rustc-link-lib=dylib=cuda"); - println!("cargo:rustc-link-lib=dylib=cudart"); + println!("cargo:rustc-link-lib=static=cudart_static"); } } From 50d05338743d364353fb97d0c94daeef149b77db Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Wed, 26 Oct 2022 22:56:35 -0700 Subject: [PATCH 10/36] Just public --- src/booster.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/booster.rs b/src/booster.rs index ac5c172..861ad31 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -533,7 +533,7 @@ impl Booster { xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) } - pub(crate) fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { + pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); xgb_call!(xgboost_sys::XGBoosterSetParam(self.handle, name.as_ptr(), value.as_ptr())) From 9693f8eb6d3faf0bab6ef72a645985d12e149773 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Mon, 1 May 2023 10:11:22 -0700 Subject: [PATCH 11/36] Manually include c++11 paths --- xgboost-sys/build.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index f7be737..64475bb 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -41,7 +41,10 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") + .derive_default(true) .clang_args(&["-x", "c++", "-std=c++11"]) + .clang_arg(format!("-I/usr/include/c++/11")) + .clang_arg(format!("-I/usr/include/x86_64-linux-gnu/c++/11")) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())); From 054408184842a70b193a77c8d3655987294f5278 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Mon, 1 May 2023 10:26:10 -0700 Subject: [PATCH 12/36] diff --- xgboost-sys/build.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 64475bb..eba5d6e 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -41,14 +41,16 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") - .derive_default(true) .clang_args(&["-x", "c++", "-std=c++11"]) - .clang_arg(format!("-I/usr/include/c++/11")) - .clang_arg(format!("-I/usr/include/x86_64-linux-gnu/c++/11")) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())); + #[cfg(target_os = "linux")] + let bindings = bindings + .clang_arg(format!("-I/usr/include/c++/11")) + .clang_arg(format!("-I/usr/include/x86_64-linux-gnu/c++/11")); + #[cfg(feature = "cuda")] let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); let bindings = bindings From b2b8af66e6b6b91bac90f1652dfdde4e6335e677 Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Fri, 7 Jul 2023 19:36:47 +0800 Subject: [PATCH 13/36] fix: can not build in rhel8 centos8 Signed-off-by: yihong0618 --- xgboost-sys/build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index eba5d6e..664ff1f 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -63,6 +63,7 @@ fn main() { .expect("Couldn't write bindings."); println!("cargo:rustc-link-search={}", xgb_root.join("lib").display()); + println!("cargo:rustc-link-search={}", xgb_root.join("lib64").display()); println!("cargo:rustc-link-search={}", xgb_root.join("rabit/lib").display()); println!("cargo:rustc-link-search={}", xgb_root.join("dmlc-core").display()); @@ -77,6 +78,7 @@ fn main() { println!("cargo:rustc-link-search=native={}", dst.display()); println!("cargo:rustc-link-search=native={}", dst.join("lib").display()); + println!("cargo:rustc-link-search=native={}", dst.join("lib64").display()); println!("cargo:rustc-link-lib=static=dmlc"); println!("cargo:rustc-link-lib=static=xgboost"); From fd6aedfde47de2f56d60726c4f376e8103011b32 Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Wed, 19 Jul 2023 23:00:06 -0700 Subject: [PATCH 14/36] XGBoost Mac compile --- xgboost-sys/build.rs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 664ff1f..e1ad0cc 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -28,14 +28,23 @@ fn main() { .define("BUILD_STATIC_LIB", "ON") .define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") - .define("BUILD_WITH_CUDA_CUB", "ON") - .build(); + .define("BUILD_WITH_CUDA_CUB", "ON"); #[cfg(not(feature = "cuda"))] - let dst = Config::new(&xgb_root) - .uses_cxx11() - .define("BUILD_STATIC_LIB", "ON") - .build(); + let mut dst = Config::new(&xgb_root); + + let dst = dst.uses_cxx11() + .define("BUILD_STATIC_LIB", "ON"); + + #[cfg(target_os = "macos")] + let dst = + dst + .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") + .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") + .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); + + let dst = dst.build(); let xgb_root = xgb_root.canonicalize().unwrap(); @@ -57,6 +66,10 @@ fn main() { .generate() .expect("Unable to generate bindings."); + #[cfg(targe_os = "darwin")] + let bindings = bindings + .clang_arg("-L/opt/homebrew/Cellar/libomp/16.0.6/lib/"); + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); bindings .write_to_file(out_path.join("bindings.rs")) From 65fd5f0b40757ded4d76a9590a5d8e719b4428dd Mon Sep 17 00:00:00 2001 From: Lev Kokotov Date: Thu, 20 Jul 2023 10:19:03 -0700 Subject: [PATCH 15/36] bad commit --- xgboost-sys/build.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index e1ad0cc..c48cf83 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -66,10 +66,6 @@ fn main() { .generate() .expect("Unable to generate bindings."); - #[cfg(targe_os = "darwin")] - let bindings = bindings - .clang_arg("-L/opt/homebrew/Cellar/libomp/16.0.6/lib/"); - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); bindings .write_to_file(out_path.join("bindings.rs")) From d10eee931fc7f8cfd62f14088a15fcc1b392606e Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 19 Sep 2023 11:05:15 -0700 Subject: [PATCH 16/36] Updated bindgen version to fix build bug --- xgboost-sys/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index 20406d9..941790f 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -13,7 +13,7 @@ readme = "README.md" libc = "0.2" [build-dependencies] -bindgen = "0.61" +bindgen = "0.68" cmake = "0.1" [features] From 9e52e2061bfe82f60ed993a7a58fc02fd712e2ae Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 17:13:19 -0800 Subject: [PATCH 17/36] checkpoint --- rustfmt.toml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..e69de29 From ad253b486e28a6578ba63a1b4e33a311fa2ac694 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 17:13:46 -0800 Subject: [PATCH 18/36] checkpoint --- Cargo.toml | 6 +- rustfmt.toml | 2 + src/booster.rs | 312 +++++++++++++++++++++++-------------- src/dmatrix.rs | 137 +++++++++------- src/error.rs | 10 +- src/lib.rs | 6 +- src/parameters/booster.rs | 8 +- src/parameters/dart.rs | 8 +- src/parameters/learning.rs | 40 ++--- src/parameters/linear.rs | 5 +- src/parameters/mod.rs | 87 ++++++----- src/parameters/tree.rs | 75 ++++----- xgboost-sys/xgboost | 2 +- 13 files changed, 406 insertions(+), 292 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8950644..de79cf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,10 +12,10 @@ readme = "README.md" [dependencies] xgboost-sys = { path = "xgboost-sys" } libc = "0.2" -derive_builder = "0.11" +derive_builder = "0.12" log = "0.4" -tempfile = "3.0" -indexmap = "1.0" +tempfile = "3.9" +indexmap = "2.1" [features] cuda = ["xgboost-sys/cuda"] diff --git a/rustfmt.toml b/rustfmt.toml index e69de29..d976b15 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 120 +single_line_if_else_max_width = 80 diff --git a/src/booster.rs b/src/booster.rs index 861ad31..0661018 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -1,16 +1,16 @@ +use dmatrix::DMatrix; +use error::XGBError; use libc; -use std::{fs::File, fmt, slice, ffi, ptr}; -use std::str::FromStr; -use std::io::{self, Write, BufReader, BufRead}; use std::collections::{BTreeMap, HashMap}; -use std::path::{Path, PathBuf}; -use error::XGBError; -use dmatrix::DMatrix; +use std::io::{self, BufRead, BufReader, Write}; use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::{ffi, fmt, fs::File, ptr, slice}; -use xgboost_sys; -use tempfile; use indexmap::IndexMap; +use tempfile; +use xgboost_sys; use super::XGBResult; use parameters::{BoosterParameters, TrainingParameters}; @@ -76,7 +76,11 @@ impl Booster { let mut handle = ptr::null_mut(); // TODO: check this is safe if any dmats are freed let s: Vec = dmats.iter().map(|x| x.handle).collect(); - xgb_call!(xgboost_sys::XGBoosterCreate(s.as_ptr(), dmats.len() as u64, &mut handle))?; + xgb_call!(xgboost_sys::XGBoosterCreate( + s.as_ptr(), + dmats.len() as u64, + &mut handle + ))?; let mut booster = Booster { handle }; booster.set_params(params)?; @@ -112,7 +116,11 @@ impl Booster { let mut handle = ptr::null_mut(); xgb_call!(xgboost_sys::XGBoosterCreate(ptr::null(), 0, &mut handle))?; - xgb_call!(xgboost_sys::XGBoosterLoadModelFromBuffer(handle, bytes.as_ptr() as *const _, bytes.len() as u64))?; + xgb_call!(xgboost_sys::XGBoosterLoadModelFromBuffer( + handle, + bytes.as_ptr() as *const _, + bytes.len() as u64 + ))?; Ok(Booster { handle }) } @@ -178,7 +186,8 @@ impl Booster { for (dmat, dmat_name) in eval_sets { let margin = bst.predict_margin(dmat)?; let eval_result = eval_fn(&margin, dmat); - let eval_results = dmat_eval_results.entry(eval_name.to_string()) + let eval_results = dmat_eval_results + .entry(eval_name.to_string()) .or_insert_with(IndexMap::new); eval_results.insert(dmat_name.to_string(), eval_result); } @@ -222,7 +231,11 @@ impl Booster { /// * `dtrain` - matrix to train the model with for a single iteration /// * `iteration` - current iteration number pub fn update(&mut self, dtrain: &DMatrix, iteration: i32) -> XGBResult<()> { - xgb_call!(xgboost_sys::XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle)) + xgb_call!(xgboost_sys::XGBoosterUpdateOneIter( + self.handle, + iteration, + dtrain.handle + )) } /// Update this model by training it for one round with a custom objective function. @@ -241,8 +254,11 @@ impl Booster { /// * `hessian` - second order gradient fn boost(&mut self, dtrain: &DMatrix, gradient: &[f32], hessian: &[f32]) -> XGBResult<()> { if gradient.len() != hessian.len() { - let msg = format!("Mismatch between length of gradient and hessian arrays ({} != {})", - gradient.len(), hessian.len()); + let msg = format!( + "Mismatch between length of gradient and hessian arrays ({} != {})", + gradient.len(), + hessian.len() + ); return Err(XGBError::new(msg)); } assert_eq!(gradient.len(), hessian.len()); @@ -250,14 +266,20 @@ impl Booster { // TODO: _validate_feature_names let mut grad_vec = gradient.to_vec(); let mut hess_vec = hessian.to_vec(); - xgb_call!(xgboost_sys::XGBoosterBoostOneIter(self.handle, - dtrain.handle, - grad_vec.as_mut_ptr(), - hess_vec.as_mut_ptr(), - grad_vec.len() as u64)) + xgb_call!(xgboost_sys::XGBoosterBoostOneIter( + self.handle, + dtrain.handle, + grad_vec.as_mut_ptr(), + hess_vec.as_mut_ptr(), + grad_vec.len() as u64 + )) } - fn eval_set(&self, evals: &[(&DMatrix, &str)], iteration: i32) -> XGBResult>> { + fn eval_set( + &self, + evals: &[(&DMatrix, &str)], + iteration: i32, + ) -> XGBResult>> { let (dmats, names) = { let mut dmats = Vec::with_capacity(evals.len()); let mut names = Vec::with_capacity(evals.len()); @@ -285,12 +307,14 @@ impl Booster { evptrs.shrink_to_fit(); let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterEvalOneIter(self.handle, - iteration, - s.as_mut_ptr(), - evptrs.as_mut_ptr(), - dmats.len() as u64, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterEvalOneIter( + self.handle, + iteration, + s.as_mut_ptr(), + evptrs.as_mut_ptr(), + dmats.len() as u64, + &mut out_result + ))?; let out = unsafe { ffi::CStr::from_ptr(out_result).to_str().unwrap().to_owned() }; Ok(Booster::parse_eval_string(&out, &names)) } @@ -304,11 +328,9 @@ impl Booster { let name = "default"; let mut eval = self.eval_set(&[(dmat, name)], 0)?; let mut result = HashMap::new(); - eval.remove(name).unwrap() - .into_iter() - .for_each(|(k, v)| { - result.insert(k.to_owned(), v); - }); + eval.remove(name).unwrap().into_iter().for_each(|(k, v)| { + result.insert(k.to_owned(), v); + }); Ok(result) } @@ -318,7 +340,12 @@ impl Booster { let key = ffi::CString::new(key).unwrap(); let mut out_buf = ptr::null(); let mut success = 0; - xgb_call!(xgboost_sys::XGBoosterGetAttr(self.handle, key.as_ptr(), &mut out_buf, &mut success))?; + xgb_call!(xgboost_sys::XGBoosterGetAttr( + self.handle, + key.as_ptr(), + &mut out_buf, + &mut success + ))?; if success == 0 { return Ok(None); } @@ -343,7 +370,8 @@ impl Booster { xgb_call!(xgboost_sys::XGBoosterGetAttrNames(self.handle, &mut out_len, &mut out))?; let out_ptr_slice = unsafe { slice::from_raw_parts(out, out_len as usize) }; - let out_vec = out_ptr_slice.iter() + let out_vec = out_ptr_slice + .iter() .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) .collect(); Ok(out_vec) @@ -357,13 +385,15 @@ impl Booster { let ntree_limit = 0; let mut out_len = 0; let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterPredict(self.handle, - dmat.handle, - option_mask, - ntree_limit, - 0, - &mut out_len, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterPredict( + self.handle, + dmat.handle, + option_mask, + ntree_limit, + 0, + &mut out_len, + &mut out_result + ))?; assert!(!out_result.is_null()); let data = unsafe { slice::from_raw_parts(out_result, out_len as usize).to_vec() }; @@ -378,13 +408,15 @@ impl Booster { let ntree_limit = 0; let mut out_len = 0; let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterPredict(self.handle, - dmat.handle, - option_mask, - ntree_limit, - 1, - &mut out_len, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterPredict( + self.handle, + dmat.handle, + option_mask, + ntree_limit, + 1, + &mut out_len, + &mut out_result + ))?; assert!(!out_result.is_null()); let data = unsafe { slice::from_raw_parts(out_result, out_len as usize).to_vec() }; Ok(data) @@ -400,13 +432,15 @@ impl Booster { let ntree_limit = 0; let mut out_len = 0; let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterPredict(self.handle, - dmat.handle, - option_mask, - ntree_limit, - 0, - &mut out_len, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterPredict( + self.handle, + dmat.handle, + option_mask, + ntree_limit, + 0, + &mut out_len, + &mut out_result + ))?; assert!(!out_result.is_null()); let data = unsafe { slice::from_raw_parts(out_result, out_len as usize).to_vec() }; @@ -427,13 +461,15 @@ impl Booster { let ntree_limit = 0; let mut out_len = 0; let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterPredict(self.handle, - dmat.handle, - option_mask, - ntree_limit, - 0, - &mut out_len, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterPredict( + self.handle, + dmat.handle, + option_mask, + ntree_limit, + 0, + &mut out_len, + &mut out_result + ))?; assert!(!out_result.is_null()); let data = unsafe { slice::from_raw_parts(out_result, out_len as usize).to_vec() }; @@ -455,13 +491,15 @@ impl Booster { let ntree_limit = 0; let mut out_len = 0; let mut out_result = ptr::null(); - xgb_call!(xgboost_sys::XGBoosterPredict(self.handle, - dmat.handle, - option_mask, - ntree_limit, - 0, - &mut out_len, - &mut out_result))?; + xgb_call!(xgboost_sys::XGBoosterPredict( + self.handle, + dmat.handle, + option_mask, + ntree_limit, + 0, + &mut out_len, + &mut out_result + ))?; assert!(!out_result.is_null()); let data = unsafe { slice::from_raw_parts(out_result, out_len as usize).to_vec() }; @@ -507,15 +545,18 @@ impl Booster { let format = ffi::CString::new("text").unwrap(); let mut out_len = 0; let mut out_dump_array = ptr::null_mut(); - xgb_call!(xgboost_sys::XGBoosterDumpModelEx(self.handle, - fmap.as_ptr(), - with_statistics as i32, - format.as_ptr(), - &mut out_len, - &mut out_dump_array))?; + xgb_call!(xgboost_sys::XGBoosterDumpModelEx( + self.handle, + fmap.as_ptr(), + with_statistics as i32, + format.as_ptr(), + &mut out_len, + &mut out_dump_array + ))?; let out_ptr_slice = unsafe { slice::from_raw_parts(out_dump_array, out_len as usize) }; - let out_vec: Vec = out_ptr_slice.iter() + let out_vec: Vec = out_ptr_slice + .iter() .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) .collect(); @@ -536,7 +577,11 @@ impl Booster { pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); - xgb_call!(xgboost_sys::XGBoosterSetParam(self.handle, name.as_ptr(), value.as_ptr())) + xgb_call!(xgboost_sys::XGBoosterSetParam( + self.handle, + name.as_ptr(), + value.as_ptr() + )) } fn parse_eval_string(eval: &str, evnames: &[&str]) -> IndexMap> { @@ -546,10 +591,11 @@ impl Booster { for part in eval.split('\t').skip(1) { for evname in evnames { if part.starts_with(evname) { - let metric_parts: Vec<&str> = part[evname.len()+1..].split(':').into_iter().collect(); + let metric_parts: Vec<&str> = part[evname.len() + 1..].split(':').into_iter().collect(); assert_eq!(metric_parts.len(), 2); let metric = metric_parts[0]; - let score = metric_parts[1].parse::() + let score = metric_parts[1] + .parse::() .unwrap_or_else(|_| panic!("Unable to parse XGBoost metrics output: {}", eval)); let metric_map = result.entry(evname.to_string()).or_insert_with(IndexMap::new); @@ -561,7 +607,6 @@ impl Booster { debug!("result: {:?}", &result); result } - } impl Drop for Booster { @@ -603,16 +648,22 @@ impl FeatureMap { let line = line?; let parts: Vec<&str> = line.split('\t').collect(); if parts.len() != 3 { - let msg = format!("Unable to parse features from line {}, expected 3 tab separated values", i+1); + let msg = format!( + "Unable to parse features from line {}, expected 3 tab separated values", + i + 1 + ); return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); } assert_eq!(parts.len(), 3); let feature_num: u32 = match parts[0].parse() { - Ok(num) => num, + Ok(num) => num, Err(err) => { - let msg = format!("Unable to parse features from line {}, could not parse feature number: {}", - i+1, err); + let msg = format!( + "Unable to parse features from line {}, could not parse feature number: {}", + i + 1, + err + ); return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); } }; @@ -620,8 +671,8 @@ impl FeatureMap { let feature_name = &parts[1]; let feature_type = match FeatureType::from_str(&parts[2]) { Ok(feature_type) => feature_type, - Err(msg) => { - let msg = format!("Unable to parse features from line {}: {}", i+1, msg); + Err(msg) => { + let msg = format!("Unable to parse features from line {}: {}", i + 1, msg); return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); } }; @@ -648,10 +699,13 @@ impl FromStr for FeatureType { fn from_str(s: &str) -> Result { match s { - "i" => Ok(FeatureType::Binary), - "q" => Ok(FeatureType::Quantitative), + "i" => Ok(FeatureType::Binary), + "q" => Ok(FeatureType::Quantitative), "int" => Ok(FeatureType::Integer), - _ => Err(format!("unrecognised feature type '{}', must be one of: 'i', 'q', 'int'", s)) + _ => Err(format!( + "unrecognised feature type '{}', must be one of: 'i', 'q', 'int'", + s + )), } } } @@ -733,9 +787,13 @@ mod tests { assert_eq!(attrs, Vec::::new()); booster.set_attribute("foo", "bar").expect("Setting attribute failed"); - booster.set_attribute("another", "another").expect("Setting attribute failed"); + booster + .set_attribute("another", "another") + .expect("Setting attribute failed"); booster.set_attribute("4", "4").expect("Setting attribute failed"); - booster.set_attribute("an even longer attribute name?", "").expect("Setting attribute failed"); + booster + .set_attribute("an even longer attribute name?", "") + .expect("Setting attribute failed"); let mut expected = vec!["foo", "another", "4", "an even longer attribute name?"]; expected.sort(); @@ -756,9 +814,11 @@ mod tests { .unwrap(); let learning_params = learning::LearningTaskParametersBuilder::default() .objective(learning::Objective::BinaryLogistic) - .eval_metrics(learning::Metrics::Custom(vec![learning::EvaluationMetric::MAPCutNegative(4), - learning::EvaluationMetric::LogLoss, - learning::EvaluationMetric::BinaryErrorRate(0.5)])) + .eval_metrics(learning::Metrics::Custom(vec![ + learning::EvaluationMetric::MAPCutNegative(4), + learning::EvaluationMetric::LogLoss, + learning::EvaluationMetric::BinaryErrorRate(0.5), + ])) .build() .unwrap(); let params = parameters::BoosterParametersBuilder::default() @@ -785,28 +845,32 @@ mod tests { assert_eq!(v.len(), dmat_test.num_rows()); // first 10 predictions - let expected_start = [0.0050151693, - 0.9884467, - 0.0050151693, - 0.0050151693, - 0.026636455, - 0.11789363, - 0.9884467, - 0.01231471, - 0.9884467, - 0.00013656063]; + let expected_start = [ + 0.0050151693, + 0.9884467, + 0.0050151693, + 0.0050151693, + 0.026636455, + 0.11789363, + 0.9884467, + 0.01231471, + 0.9884467, + 0.00013656063, + ]; // last 10 predictions - let expected_end = [0.002520344, - 0.00060917926, - 0.99881005, - 0.00060917926, - 0.00060917926, - 0.00060917926, - 0.00060917926, - 0.9981102, - 0.002855195, - 0.9981102]; + let expected_end = [ + 0.002520344, + 0.00060917926, + 0.99881005, + 0.00060917926, + 0.00060917926, + 0.00060917926, + 0.00060917926, + 0.9981102, + 0.002855195, + 0.9981102, + ]; let eps = 1e-6; for (pred, expected) in v.iter().zip(&expected_start) { @@ -814,7 +878,7 @@ mod tests { assert!(pred - expected < eps); } - for (pred, expected) in v[v.len()-10..].iter().zip(&expected_end) { + for (pred, expected) in v[v.len() - 10..].iter().zip(&expected_end) { println!("predictions={}, expected={}", pred, expected); assert!(pred - expected < eps); } @@ -948,28 +1012,33 @@ mod tests { let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) .eta(1.0) - .build().unwrap(); + .build() + .unwrap(); let learning_params = learning::LearningTaskParametersBuilder::default() .objective(learning::Objective::BinaryLogistic) - .build().unwrap(); + .build() + .unwrap(); let booster_params = parameters::BoosterParametersBuilder::default() .booster_type(parameters::BoosterType::Tree(tree_params)) .learning_params(learning_params) .verbose(false) - .build().unwrap(); + .build() + .unwrap(); let training_params = parameters::TrainingParametersBuilder::default() .booster_params(booster_params) .dtrain(&dmat_train) .boost_rounds(10) - .build().unwrap(); + .build() + .unwrap(); let booster = Booster::train(&training_params).unwrap(); let features = FeatureMap::from_file("xgboost-sys/xgboost/demo/data/featmap.txt") .expect("failed to parse feature map file"); - assert_eq!(booster.dump_model(true, Some(&features)).unwrap(), -"0:[odor=none] yes=2,no=1,gain=4000.53101,cover=1628.25 + assert_eq!( + booster.dump_model(true, Some(&features)).unwrap(), + "0:[odor=none] yes=2,no=1,gain=4000.53101,cover=1628.25 1:[stalk-root=club] yes=4,no=3,gain=1158.21204,cover=924.5 3:leaf=1.71217716,cover=812 4:leaf=-1.70044053,cover=112.5 @@ -1040,6 +1109,7 @@ mod tests { 2:[stalk-root=missing] yes=6,no=5,gain=19.3462334,cover=2.87474418 5:leaf=3.63442755,cover=1.34154534 6:leaf=-0.609474957,cover=1.53319895 -"); +" + ); } } diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 212b8b7..411a0d1 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -1,17 +1,17 @@ -use std::{slice, ffi, ptr, path::Path}; -use libc::{c_uint, c_float}; -use std::os::unix::ffi::OsStrExt; +use libc::{c_float, c_uint}; use std::convert::TryInto; +use std::os::unix::ffi::OsStrExt; +use std::{ffi, path::Path, ptr, slice}; use xgboost_sys; -use super::{XGBResult, XGBError}; +use super::{XGBError, XGBResult}; -static KEY_GROUP_PTR: &'static str = "group_ptr"; -static KEY_GROUP: &'static str = "group"; -static KEY_LABEL: &'static str = "label"; -static KEY_WEIGHT: &'static str = "weight"; -static KEY_BASE_MARGIN: &'static str = "base_margin"; +static KEY_GROUP_PTR: &str = "group_ptr"; +static KEY_GROUP: &str = "group"; +static KEY_LABEL: &str = "label"; +static KEY_WEIGHT: &str = "weight"; +static KEY_BASE_MARGIN: &str = "base_margin"; /// Data matrix used throughout XGBoost for training/predicting [`Booster`](struct.Booster.html) models. /// @@ -88,7 +88,11 @@ impl DMatrix { let num_cols = out as usize; info!("Loaded DMatrix with shape: {}x{}", num_rows, num_cols); - Ok(DMatrix { handle, num_rows, num_cols }) + Ok(DMatrix { + handle, + num_rows, + num_cols, + }) } /// Create a new `DMatrix` from dense array in row-major order. @@ -109,11 +113,13 @@ impl DMatrix { /// ``` pub fn from_dense(data: &[f32], num_rows: usize) -> XGBResult { let mut handle = ptr::null_mut(); - xgb_call!(xgboost_sys::XGDMatrixCreateFromMat(data.as_ptr(), - num_rows as xgboost_sys::bst_ulong, - (data.len() / num_rows) as xgboost_sys::bst_ulong, - f32::NAN, - &mut handle))?; + xgb_call!(xgboost_sys::XGDMatrixCreateFromMat( + data.as_ptr(), + num_rows as xgboost_sys::bst_ulong, + (data.len() / num_rows) as xgboost_sys::bst_ulong, + f32::NAN, + &mut handle + ))?; Ok(DMatrix::new(handle)?) } @@ -130,13 +136,15 @@ impl DMatrix { let mut handle = ptr::null_mut(); let indices: Vec = indices.iter().map(|x| *x as u32).collect(); let num_cols = num_cols.unwrap_or(0); // infer from data if 0 - xgb_call!(xgboost_sys::XGDMatrixCreateFromCSREx(indptr.as_ptr(), - indices.as_ptr(), - data.as_ptr(), - indptr.len().try_into().unwrap(), - data.len().try_into().unwrap(), - num_cols.try_into().unwrap(), - &mut handle))?; + xgb_call!(xgboost_sys::XGDMatrixCreateFromCSREx( + indptr.as_ptr(), + indices.as_ptr(), + data.as_ptr(), + indptr.len().try_into().unwrap(), + data.len().try_into().unwrap(), + num_cols.try_into().unwrap(), + &mut handle + ))?; Ok(DMatrix::new(handle)?) } @@ -153,13 +161,15 @@ impl DMatrix { let mut handle = ptr::null_mut(); let indices: Vec = indices.iter().map(|x| *x as u32).collect(); let num_rows = num_rows.unwrap_or(0); // infer from data if 0 - xgb_call!(xgboost_sys::XGDMatrixCreateFromCSCEx(indptr.as_ptr(), - indices.as_ptr(), - data.as_ptr(), - indptr.len().try_into().unwrap(), - data.len().try_into().unwrap(), - num_rows.try_into().unwrap(), - &mut handle))?; + xgb_call!(xgboost_sys::XGDMatrixCreateFromCSCEx( + indptr.as_ptr(), + indices.as_ptr(), + data.as_ptr(), + indptr.len().try_into().unwrap(), + data.len().try_into().unwrap(), + num_rows.try_into().unwrap(), + &mut handle + ))?; Ok(DMatrix::new(handle)?) } @@ -190,7 +200,11 @@ impl DMatrix { let mut handle = ptr::null_mut(); let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); let silent = true; - xgb_call!(xgboost_sys::XGDMatrixCreateFromFile(fname.as_ptr(), silent as i32, &mut handle))?; + xgb_call!(xgboost_sys::XGDMatrixCreateFromFile( + fname.as_ptr(), + silent as i32, + &mut handle + ))?; Ok(DMatrix::new(handle)?) } @@ -199,7 +213,11 @@ impl DMatrix { debug!("Writing DMatrix to: {}", path.as_ref().display()); let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); let silent = true; - xgb_call!(xgboost_sys::XGDMatrixSaveBinary(self.handle, fname.as_ptr(), silent as i32)) + xgb_call!(xgboost_sys::XGDMatrixSaveBinary( + self.handle, + fname.as_ptr(), + silent as i32 + )) } /// Get the number of rows in this matrix. @@ -222,10 +240,12 @@ impl DMatrix { debug!("Slicing {} rows from DMatrix", indices.len()); let mut out_handle = ptr::null_mut(); let indices: Vec = indices.iter().map(|x| *x as i32).collect(); - xgb_call!(xgboost_sys::XGDMatrixSliceDMatrix(self.handle, - indices.as_ptr(), - indices.len() as xgboost_sys::bst_ulong, - &mut out_handle))?; + xgb_call!(xgboost_sys::XGDMatrixSliceDMatrix( + self.handle, + indices.as_ptr(), + indices.len() as xgboost_sys::bst_ulong, + &mut out_handle + ))?; Ok(DMatrix::new(out_handle)?) } @@ -280,44 +300,51 @@ impl DMatrix { self.get_uint_info(KEY_GROUP_PTR) } - fn get_float_info(&self, field: &str) -> XGBResult<&[f32]> { let field = ffi::CString::new(field).unwrap(); let mut out_len = 0; let mut out_dptr = ptr::null(); - xgb_call!(xgboost_sys::XGDMatrixGetFloatInfo(self.handle, - field.as_ptr(), - &mut out_len, - &mut out_dptr))?; + xgb_call!(xgboost_sys::XGDMatrixGetFloatInfo( + self.handle, + field.as_ptr(), + &mut out_len, + &mut out_dptr + ))?; Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) } fn set_float_info(&mut self, field: &str, array: &[f32]) -> XGBResult<()> { let field = ffi::CString::new(field).unwrap(); - xgb_call!(xgboost_sys::XGDMatrixSetFloatInfo(self.handle, - field.as_ptr(), - array.as_ptr(), - array.len() as u64)) + xgb_call!(xgboost_sys::XGDMatrixSetFloatInfo( + self.handle, + field.as_ptr(), + array.as_ptr(), + array.len() as u64 + )) } fn get_uint_info(&self, field: &str) -> XGBResult<&[u32]> { let field = ffi::CString::new(field).unwrap(); let mut out_len = 0; let mut out_dptr = ptr::null(); - xgb_call!(xgboost_sys::XGDMatrixGetUIntInfo(self.handle, - field.as_ptr(), - &mut out_len, - &mut out_dptr))?; + xgb_call!(xgboost_sys::XGDMatrixGetUIntInfo( + self.handle, + field.as_ptr(), + &mut out_len, + &mut out_dptr + ))?; Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_uint, out_len as usize) }) } fn set_uint_info(&mut self, field: &str, array: &[u32]) -> XGBResult<()> { let field = ffi::CString::new(field).unwrap(); - xgb_call!(xgboost_sys::XGDMatrixSetUIntInfo(self.handle, - field.as_ptr(), - array.as_ptr(), - array.len() as u64)) + xgb_call!(xgboost_sys::XGDMatrixSetUIntInfo( + self.handle, + field.as_ptr(), + array.as_ptr(), + array.len() as u64 + )) } } @@ -329,8 +356,8 @@ impl Drop for DMatrix { #[cfg(test)] mod tests { - use tempfile; use super::*; + use tempfile; fn read_train_matrix() -> XGBResult { DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train") } @@ -370,7 +397,7 @@ mod tests { let mut dmat = read_train_matrix().unwrap(); assert_eq!(dmat.get_labels().unwrap().len(), 6513); - let label = [0.1, 0.0 -4.5, 11.29842, 333333.33]; + let label = [0.1, 0.0 - 4.5, 11.29842, 333333.33]; assert!(dmat.set_labels(&label).is_ok()); assert_eq!(dmat.get_labels().unwrap(), label); } @@ -416,7 +443,7 @@ mod tests { let dmat = DMatrix::from_csr(&indptr, &indices, &data, None).unwrap(); assert_eq!(dmat.num_rows(), 4); - assert_eq!(dmat.num_cols(), 0); // https://github.com/dmlc/xgboost/pull/7265 + assert_eq!(dmat.num_cols(), 0); // https://github.com/dmlc/xgboost/pull/7265 let dmat = DMatrix::from_csr(&indptr, &indices, &data, Some(10)).unwrap(); assert_eq!(dmat.num_rows(), 4); diff --git a/src/error.rs b/src/error.rs index 5059eea..b379400 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,9 @@ //! Functionality related to errors and error handling. use std; +use std::error::Error; use std::ffi::CStr; use std::fmt::{self, Display}; -use std::error::Error; use xgboost_sys; @@ -29,9 +29,9 @@ impl XGBError { /// Meaning of any other return values are undefined, and will cause a panic. pub(crate) fn check_return_value(ret_val: i32) -> XGBResult<()> { match ret_val { - 0 => Ok(()), + 0 => Ok(()), -1 => Err(XGBError::from_xgboost()), - _ => panic!("unexpected return value '{}', expected 0 or -1", ret_val), + _ => panic!("unexpected return value '{}', expected 0 or -1", ret_val), } } @@ -39,7 +39,9 @@ impl XGBError { fn from_xgboost() -> Self { let c_str = unsafe { CStr::from_ptr(xgboost_sys::XGBGetLastError()) }; let str_slice = c_str.to_str().unwrap(); - XGBError { desc: str_slice.to_owned() } + XGBError { + desc: str_slice.to_owned(), + } } } diff --git a/src/lib.rs b/src/lib.rs index 5ba0ee9..b1344e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,10 +60,10 @@ extern crate derive_builder; #[macro_use] extern crate log; -extern crate xgboost_sys; +extern crate indexmap; extern crate libc; extern crate tempfile; -extern crate indexmap; +extern crate xgboost_sys; macro_rules! xgb_call { ($x:expr) => { @@ -72,7 +72,7 @@ macro_rules! xgb_call { } mod error; -pub use error::{XGBResult, XGBError}; +pub use error::{XGBError, XGBResult}; mod dmatrix; pub use dmatrix::DMatrix; diff --git a/src/parameters/booster.rs b/src/parameters/booster.rs index 1b56a64..dcd1b1c 100644 --- a/src/parameters/booster.rs +++ b/src/parameters/booster.rs @@ -20,7 +20,7 @@ //! ``` use std::default::Default; -use super::{tree, linear, dart}; +use super::{dart, linear, tree}; /// Type of booster to use when training a [Booster](../struct.Booster.html) model. #[derive(Clone)] @@ -46,7 +46,9 @@ pub enum BoosterType { } impl Default for BoosterType { - fn default() -> Self { BoosterType::Tree(tree::TreeBoosterParameters::default()) } + fn default() -> Self { + BoosterType::Tree(tree::TreeBoosterParameters::default()) + } } impl BoosterType { @@ -54,7 +56,7 @@ impl BoosterType { match *self { BoosterType::Tree(ref p) => p.as_string_pairs(), BoosterType::Linear(ref p) => p.as_string_pairs(), - BoosterType::Dart(ref p) => p.as_string_pairs() + BoosterType::Dart(ref p) => p.as_string_pairs(), } } } diff --git a/src/parameters/dart.rs b/src/parameters/dart.rs index bf7f942..c3208a1 100644 --- a/src/parameters/dart.rs +++ b/src/parameters/dart.rs @@ -25,7 +25,9 @@ impl ToString for SampleType { } impl Default for SampleType { - fn default() -> Self { SampleType::Uniform } + fn default() -> Self { + SampleType::Uniform + } } /// Type of normalization algorithm. @@ -53,7 +55,9 @@ impl ToString for NormalizeType { } impl Default for NormalizeType { - fn default() -> Self { NormalizeType::Tree } + fn default() -> Self { + NormalizeType::Tree + } } /// Additional parameters for Dart Booster. diff --git a/src/parameters/learning.rs b/src/parameters/learning.rs index ca88e22..994dbfd 100644 --- a/src/parameters/learning.rs +++ b/src/parameters/learning.rs @@ -71,7 +71,9 @@ pub enum Objective { impl Copy for Objective {} impl Clone for Objective { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ToString for Objective { @@ -97,7 +99,9 @@ impl ToString for Objective { } impl Default for Objective { - fn default() -> Self { Objective::RegLinear } + fn default() -> Self { + Objective::RegLinear + } } /// Type of evaluation metrics to use during learning. @@ -191,23 +195,23 @@ impl ToString for EvaluationMetric { } else { format!("error@{}", t) } - }, + } EvaluationMetric::MultiClassErrorRate => "merror".to_owned(), - EvaluationMetric::MultiClassLogLoss => "mlogloss".to_owned(), - EvaluationMetric::AUC => "auc".to_owned(), - EvaluationMetric::NDCG => "ndcg".to_owned(), - EvaluationMetric::NDCGCut(n) => format!("ndcg@{}", n), - EvaluationMetric::NDCGNegative => "ndcg-".to_owned(), - EvaluationMetric::NDCGCutNegative(n) => format!("ndcg@{}-", n), - EvaluationMetric::MAP => "map".to_owned(), - EvaluationMetric::MAPCut(n) => format!("map@{}", n), - EvaluationMetric::MAPNegative => "map-".to_owned(), - EvaluationMetric::MAPCutNegative(n) => format!("map@{}-", n), - EvaluationMetric::PoissonLogLoss => "poisson-nloglik".to_owned(), - EvaluationMetric::GammaLogLoss => "gamma-nloglik".to_owned(), - EvaluationMetric::CoxLogLoss => "cox-nloglik".to_owned(), - EvaluationMetric::GammaDeviance => "gamma-deviance".to_owned(), - EvaluationMetric::TweedieLogLoss => "tweedie-nloglik".to_owned(), + EvaluationMetric::MultiClassLogLoss => "mlogloss".to_owned(), + EvaluationMetric::AUC => "auc".to_owned(), + EvaluationMetric::NDCG => "ndcg".to_owned(), + EvaluationMetric::NDCGCut(n) => format!("ndcg@{}", n), + EvaluationMetric::NDCGNegative => "ndcg-".to_owned(), + EvaluationMetric::NDCGCutNegative(n) => format!("ndcg@{}-", n), + EvaluationMetric::MAP => "map".to_owned(), + EvaluationMetric::MAPCut(n) => format!("map@{}", n), + EvaluationMetric::MAPNegative => "map-".to_owned(), + EvaluationMetric::MAPCutNegative(n) => format!("map@{}-", n), + EvaluationMetric::PoissonLogLoss => "poisson-nloglik".to_owned(), + EvaluationMetric::GammaLogLoss => "gamma-nloglik".to_owned(), + EvaluationMetric::CoxLogLoss => "cox-nloglik".to_owned(), + EvaluationMetric::GammaDeviance => "gamma-deviance".to_owned(), + EvaluationMetric::TweedieLogLoss => "tweedie-nloglik".to_owned(), } } } diff --git a/src/parameters/linear.rs b/src/parameters/linear.rs index 3168047..90e8618 100644 --- a/src/parameters/linear.rs +++ b/src/parameters/linear.rs @@ -23,7 +23,9 @@ impl ToString for LinearUpdate { } impl Default for LinearUpdate { - fn default() -> Self { LinearUpdate::Shotgun } + fn default() -> Self { + LinearUpdate::Shotgun + } } /// BoosterParameters for Linear Booster. @@ -48,7 +50,6 @@ pub struct LinearBoosterParameters { updater: LinearUpdate, } - impl LinearBoosterParameters { pub(crate) fn as_string_pairs(&self) -> Vec<(String, String)> { let mut v = Vec::new(); diff --git a/src/parameters/mod.rs b/src/parameters/mod.rs index 35b9af6..d572518 100644 --- a/src/parameters/mod.rs +++ b/src/parameters/mod.rs @@ -9,19 +9,19 @@ use std::default::Default; use std::fmt::{self, Display}; -pub mod tree; +mod booster; +pub mod dart; pub mod learning; pub mod linear; -pub mod dart; -mod booster; +pub mod tree; -use super::DMatrix; pub use self::booster::BoosterType; use super::booster::CustomObjective; +use super::DMatrix; /// Parameters for training boosters. /// Created using [`BoosterParametersBuilder`](struct.BoosterParametersBuilder.html). -#[derive(Builder, Clone)] +#[derive(Builder, Clone, Default)] #[builder(default)] pub struct BoosterParameters { /// Type of booster (tree, linear or DART) along with its parameters. @@ -43,17 +43,6 @@ pub struct BoosterParameters { threads: Option, } -impl Default for BoosterParameters { - fn default() -> Self { - BoosterParameters { - booster_type: booster::BoosterType::default(), - learning_params: learning::LearningTaskParameters::default(), - verbose: false, - threads: None, - } - } -} - impl BoosterParameters { /// Get type of booster (tree, linear or DART) along with its parameters. pub fn booster_type(&self) -> &booster::BoosterType { @@ -127,41 +116,41 @@ pub struct TrainingParameters<'a> { /// Number of boosting rounds to use during training. /// /// *default*: `10` - #[builder(default="10")] + #[builder(default = "10")] pub(crate) boost_rounds: u32, /// Configuration for the booster model that will be trained. /// /// *default*: `BoosterParameters::default()` - #[builder(default="BoosterParameters::default()")] + #[builder(default = "BoosterParameters::default()")] pub(crate) booster_params: BoosterParameters, - #[builder(default="None")] + #[builder(default = "None")] /// Optional list of DMatrix to evaluate against after each boosting round. /// /// Supplied as a list of tuples of (DMatrix, description). The description is used to differentiate between /// different evaluation datasets when output during training. /// /// *default*: `None` - pub(crate) evaluation_sets: Option<&'a[(&'a DMatrix, &'a str)]>, + pub(crate) evaluation_sets: Option<&'a [(&'a DMatrix, &'a str)]>, /// Optional custom objective function to use for training. /// /// *default*: `None` - #[builder(default="None")] + #[builder(default = "None")] pub(crate) custom_objective_fn: Option, /// Optional custom evaluation function to use during training. /// /// *default*: `None` - #[builder(default="None")] + #[builder(default = "None")] pub(crate) custom_evaluation_fn: Option, // TODO: callbacks } -impl <'a> TrainingParameters<'a> { +impl<'a> TrainingParameters<'a> { pub fn dtrain(&self) -> &'a DMatrix { - &self.dtrain + self.dtrain } pub fn set_dtrain(&mut self, dtrain: &'a DMatrix) { @@ -184,11 +173,11 @@ impl <'a> TrainingParameters<'a> { self.booster_params = booster_params.into(); } - pub fn evaluation_sets(&self) -> &Option<&'a[(&'a DMatrix, &'a str)]> { + pub fn evaluation_sets(&self) -> &Option<&'a [(&'a DMatrix, &'a str)]> { &self.evaluation_sets } - pub fn set_evaluation_sets(&mut self, evaluation_sets: Option<&'a[(&'a DMatrix, &'a str)]>) { + pub fn set_evaluation_sets(&mut self, evaluation_sets: Option<&'a [(&'a DMatrix, &'a str)]>) { self.evaluation_sets = evaluation_sets; } @@ -225,11 +214,11 @@ impl Display for Interval { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let lower = match self.min_inclusion { Inclusion::Closed => '[', - Inclusion::Open => '(', + Inclusion::Open => '(', }; let upper = match self.max_inclusion { Inclusion::Closed => ']', - Inclusion::Open => ')', + Inclusion::Open => ')', }; write!(f, "{}{}, {}{}", lower, self.min, self.max, upper) } @@ -237,7 +226,12 @@ impl Display for Interval { impl Interval { fn new(min: T, min_inclusion: Inclusion, max: T, max_inclusion: Inclusion) -> Self { - Interval { min, min_inclusion, max, max_inclusion } + Interval { + min, + min_inclusion, + max, + max_inclusion, + } } fn new_open_open(min: T, max: T) -> Self { @@ -254,26 +248,45 @@ impl Interval { fn contains(&self, val: &T) -> bool { match self.min_inclusion { - Inclusion::Closed => if !(val >= &self.min) { return false; }, - Inclusion::Open => if !(val > &self.min) { return false; }, + Inclusion::Closed => { + if !(val >= &self.min) { + return false; + } + } + Inclusion::Open => { + if !(val > &self.min) { + return false; + } + } } match self.max_inclusion { - Inclusion::Closed => if !(val <= &self.max) { return false; }, - Inclusion::Open => if !(val < &self.max) { return false; }, + Inclusion::Closed => { + if !(val <= &self.max) { + return false; + } + } + Inclusion::Open => { + if !(val < &self.max) { + return false; + } + } } true } fn validate(&self, val: &Option, name: &str) -> Result<(), String> { - match val { + match &val { Some(ref val) => { if self.contains(&val) { Ok(()) } else { - Err(format!("Invalid value for '{}' parameter, {} is not in range {}.", name, &val, self)) + Err(format!( + "Invalid value for '{}' parameter, {} is not in range {}.", + name, &val, self + )) } - }, - None => Ok(()) + } + None => Ok(()), } } } diff --git a/src/parameters/tree.rs b/src/parameters/tree.rs index d20b158..c48e710 100644 --- a/src/parameters/tree.rs +++ b/src/parameters/tree.rs @@ -9,7 +9,7 @@ use super::Interval; /// [reference paper](http://arxiv.org/abs/1603.02754)). /// /// Distributed and external memory version only support approximate algorithm. -#[derive(Clone)] +#[derive(Clone, Default)] pub enum TreeMethod { /// Use heuristic to choose faster one. /// @@ -17,6 +17,7 @@ pub enum TreeMethod { /// * For very large-dataset, approximate algorithm will be chosen. /// * Because old behavior is always use exact greedy in single machine, user will get a message when /// approximate algorithm is chosen to notify this choice. + #[default] Auto, /// Exact greedy algorithm. @@ -49,33 +50,24 @@ impl ToString for TreeMethod { } } -impl Default for TreeMethod { - fn default() -> Self { TreeMethod::Auto } -} - -impl From for TreeMethod -{ - fn from(s: String) -> Self - { - use std::borrow::Borrow; - Self::from(s.borrow()) +impl From for TreeMethod { + fn from(s: String) -> Self { + use std::borrow::Borrow; + Self::from(s.borrow()) } } -impl<'a> From<&'a str> for TreeMethod -{ - fn from(s: &'a str) -> Self - { - match s - { - "auto" => TreeMethod::Auto, - "exact" => TreeMethod::Exact, - "approx" => TreeMethod::Approx, - "hist" => TreeMethod::Hist, - "gpu_exact" => TreeMethod::GpuExact, - "gpu_hist" => TreeMethod::GpuHist, - _ => panic!("no known tree_method for {}", s) - } +impl<'a> From<&'a str> for TreeMethod { + fn from(s: &'a str) -> Self { + match s { + "auto" => TreeMethod::Auto, + "exact" => TreeMethod::Exact, + "approx" => TreeMethod::Approx, + "hist" => TreeMethod::Hist, + "gpu_exact" => TreeMethod::GpuExact, + "gpu_hist" => TreeMethod::GpuHist, + _ => panic!("no known tree_method for {}", s), + } } } @@ -125,9 +117,10 @@ impl ToString for TreeUpdater { } /// A type of boosting process to run. -#[derive(Clone)] +#[derive(Clone, Default)] pub enum ProcessType { /// The normal boosting process which creates new trees. + #[default] Default, /// Starts from an existing model and only updates its trees. In each boosting iteration, @@ -148,14 +141,11 @@ impl ToString for ProcessType { } } -impl Default for ProcessType { - fn default() -> Self { ProcessType::Default } -} - /// Controls the way new nodes are added to the tree. -#[derive(Clone)] +#[derive(Clone, Default)] pub enum GrowPolicy { /// Split at nodes closest to the root. + #[default] Depthwise, /// Split at noeds with highest loss change. @@ -171,14 +161,11 @@ impl ToString for GrowPolicy { } } -impl Default for GrowPolicy { - fn default() -> Self { GrowPolicy::Depthwise } -} - /// The type of predictor algorithm to use. Provides the same results but allows the use of GPU or CPU. -#[derive(Clone)] +#[derive(Clone, Default)] pub enum Predictor { /// Multicore CPU prediction algorithm. + #[default] Cpu, /// Prediction using GPU. Default for ‘gpu_exact’ and ‘gpu_hist’ tree method. @@ -194,10 +181,6 @@ impl ToString for Predictor { } } -impl Default for Predictor { - fn default() -> Self { Predictor::Cpu } -} - /// BoosterParameters for Tree Booster. Create using /// [`TreeBoosterParametersBuilder`](struct.TreeBoosterParametersBuilder.html). #[derive(Builder, Clone)] @@ -404,9 +387,15 @@ impl TreeBoosterParameters { // This allows XGBoost to figure it out on it's own, and suppresses the // warning message during training. // See: https://github.com/davechallis/rust-xgboost/issues/7 - if self.updater.len() != 0 - { - v.push(("updater".to_owned(), self.updater.iter().map(|u| u.to_string()).collect::>().join(","))); + if !self.updater.is_empty() { + v.push(( + "updater".to_owned(), + self.updater + .iter() + .map(|u| u.to_string()) + .collect::>() + .join(","), + )); } v diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index b993424..78ec77f 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit b9934246faa9a25e10a12339685dfbe56d56f70b +Subproject commit 78ec77fa9787de69e4458e3f32f9d39ea1281955 From 3e536dd0b59a22cab6df1c66d49932b8c61ac00b Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 20:14:42 -0800 Subject: [PATCH 19/36] checkpoint --- examples/basic/src/main.rs | 4 +- examples/custom_objective/src/main.rs | 4 +- src/booster.rs | 179 +++++++++++++------------- src/dmatrix.rs | 60 +++++---- src/parameters/dart.rs | 35 ++--- src/parameters/learning.rs | 8 +- src/parameters/linear.rs | 23 ++-- src/parameters/mod.rs | 2 +- src/parameters/tree.rs | 49 ++++--- xgboost-sys/xgboost | 2 +- 10 files changed, 177 insertions(+), 189 deletions(-) diff --git a/examples/basic/src/main.rs b/examples/basic/src/main.rs index 2e8955e..8e1ca4d 100644 --- a/examples/basic/src/main.rs +++ b/examples/basic/src/main.rs @@ -12,9 +12,9 @@ fn main() { // load train and test matrices from text files (in LibSVM format). println!("Loading train and test matrices..."); - let dtrain = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); + let dtrain = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); println!("Train matrix: {}x{}", dtrain.num_rows(), dtrain.num_cols()); - let dtest = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dtest = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); println!("Test matrix: {}x{}", dtest.num_rows(), dtest.num_cols()); // configure objectives, metrics, etc. diff --git a/examples/custom_objective/src/main.rs b/examples/custom_objective/src/main.rs index 707f037..7af09e2 100644 --- a/examples/custom_objective/src/main.rs +++ b/examples/custom_objective/src/main.rs @@ -6,8 +6,8 @@ use xgboost::{parameters, DMatrix, Booster}; fn main() { // load train and test matrices from text files (in LibSVM format) println!("Custom objective example..."); - let dtrain = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dtest = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dtrain = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dtest = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); // specify datasets to evaluate against during training let evaluation_sets = [(&dtest, "test"), (&dtrain, "train")]; diff --git a/src/booster.rs b/src/booster.rs index 0661018..39d326c 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -591,14 +591,14 @@ impl Booster { for part in eval.split('\t').skip(1) { for evname in evnames { if part.starts_with(evname) { - let metric_parts: Vec<&str> = part[evname.len() + 1..].split(':').into_iter().collect(); + let metric_parts: Vec<&str> = part[evname.len() + 1..].split(':').collect(); assert_eq!(metric_parts.len(), 2); let metric = metric_parts[0]; let score = metric_parts[1] .parse::() .unwrap_or_else(|_| panic!("Unable to parse XGBoost metrics output: {}", eval)); - let metric_map = result.entry(evname.to_string()).or_insert_with(IndexMap::new); + let metric_map = result.entry(evname.to_string()).or_default(); metric_map.insert(metric.to_owned(), score); } } @@ -669,7 +669,7 @@ impl FeatureMap { }; let feature_name = &parts[1]; - let feature_type = match FeatureType::from_str(&parts[2]) { + let feature_type = match FeatureType::from_str(parts[2]) { Ok(feature_type) => feature_type, Err(msg) => { let msg = format!("Unable to parse features from line {}: {}", i + 1, msg); @@ -727,7 +727,7 @@ mod tests { use parameters::{self, learning, tree}; fn read_train_matrix() -> XGBResult { - DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train") + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#) } fn load_test_booster() -> Booster { @@ -761,7 +761,7 @@ mod tests { #[test] fn save_and_load_from_buffer() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); let mut booster = Booster::new_with_cached_dmats(&BoosterParameters::default(), &[&dmat_train]).unwrap(); let attr = booster.get_attribute("foo").expect("Getting attribute failed"); assert_eq!(attr, None); @@ -804,8 +804,8 @@ mod tests { #[test] fn predict() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dmat_test = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test =DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -835,11 +835,11 @@ mod tests { let train_metrics = booster.evaluate(&dmat_train).unwrap(); assert_eq!(*train_metrics.get("logloss").unwrap(), 0.006634271); - assert_eq!(*train_metrics.get("map@4-").unwrap(), 0.0012738854); + assert_eq!(*train_metrics.get("map@4-").unwrap(), 1.0); let test_metrics = booster.evaluate(&dmat_test).unwrap(); assert_eq!(*test_metrics.get("logloss").unwrap(), 0.006919953); - assert_eq!(*test_metrics.get("map@4-").unwrap(), 0.005154639); + assert_eq!(*test_metrics.get("map@4-").unwrap(), 1.0); let v = booster.predict(&dmat_test).unwrap(); assert_eq!(v.len(), dmat_test.num_rows()); @@ -886,8 +886,8 @@ mod tests { #[test] fn predict_leaf() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dmat_test = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -919,8 +919,8 @@ mod tests { #[test] fn predict_contributions() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dmat_test = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -953,8 +953,8 @@ mod tests { #[test] fn predict_interactions() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dmat_test = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -1005,7 +1005,7 @@ mod tests { #[test] fn dump_model() { - let dmat_train = DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); + let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); println!("{:?}", dmat_train.shape()); @@ -1033,82 +1033,79 @@ mod tests { .unwrap(); let booster = Booster::train(&training_params).unwrap(); - let features = FeatureMap::from_file("xgboost-sys/xgboost/demo/data/featmap.txt") - .expect("failed to parse feature map file"); - assert_eq!( - booster.dump_model(true, Some(&features)).unwrap(), - "0:[odor=none] yes=2,no=1,gain=4000.53101,cover=1628.25 -1:[stalk-root=club] yes=4,no=3,gain=1158.21204,cover=924.5 - 3:leaf=1.71217716,cover=812 - 4:leaf=-1.70044053,cover=112.5 -2:[spore-print-color=green] yes=6,no=5,gain=198.173828,cover=703.75 - 5:leaf=-1.94070864,cover=690.5 - 6:leaf=1.85964918,cover=13.25 - -0:[stalk-root=rooted] yes=2,no=1,gain=832.545044,cover=788.852051 -1:[odor=none] yes=4,no=3,gain=569.725098,cover=768.389709 - 3:leaf=0.78471756,cover=458.936859 - 4:leaf=-0.968530357,cover=309.45282 - 2:leaf=-6.23624468,cover=20.462389 - -0:[ring-type=pendant] yes=2,no=1,gain=368.744568,cover=457.069458 -1:[stalk-surface-below-ring=scaly] yes=4,no=3,gain=226.33696,cover=221.051468 - 3:leaf=0.658725023,cover=212.999451 - 4:leaf=5.77228642,cover=8.05200672 -2:[spore-print-color=purple] yes=6,no=5,gain=258.184265,cover=236.018005 - 5:leaf=-0.791407049,cover=233.487625 - 6:leaf=-9.421422,cover=2.53038669 - -0:[odor=foul] yes=2,no=1,gain=140.486069,cover=364.119354 -1:[gill-size=broad] yes=4,no=3,gain=139.860504,cover=274.101959 - 3:leaf=0.614153326,cover=95.8599854 - 4:leaf=-0.877905607,cover=178.241974 - 2:leaf=1.07747853,cover=90.0174103 - -0:[spore-print-color=green] yes=2,no=1,gain=112.605011,cover=189.202194 -1:[gill-spacing=close] yes=4,no=3,gain=66.4029999,cover=177.771835 - 3:leaf=-1.26934469,cover=42.277401 - 4:leaf=0.152607277,cover=135.494431 - 2:leaf=2.92190909,cover=11.4303684 - -0:[odor=almond] yes=2,no=1,gain=52.5610275,cover=170.612762 -1:[odor=anise] yes=4,no=3,gain=67.3869553,cover=150.881165 - 3:leaf=0.431742132,cover=131.902222 - 4:leaf=-1.53846073,cover=18.9789505 -2:[gill-spacing=close] yes=6,no=5,gain=12.4420624,cover=19.731596 - 5:leaf=-3.02413678,cover=3.65769386 - 6:leaf=-1.02315068,cover=16.0739021 - -0:[odor=none] yes=2,no=1,gain=66.2389145,cover=142.360611 -1:[odor=anise] yes=4,no=3,gain=31.2294312,cover=72.7557373 - 3:leaf=0.777142286,cover=64.5309982 - 4:leaf=-1.19710124,cover=8.22473907 -2:[spore-print-color=green] yes=6,no=5,gain=12.1987419,cover=69.6048737 - 5:leaf=-0.912605286,cover=66.1211166 - 6:leaf=0.836115122,cover=3.48375821 - -0:[gill-size=broad] yes=2,no=1,gain=20.6531773,cover=79.4027634 -1:[spore-print-color=white] yes=4,no=3,gain=16.0703697,cover=34.9289207 - 3:leaf=-0.0180106498,cover=25.0319824 - 4:leaf=1.4361918,cover=9.89693928 -2:[odor=foul] yes=6,no=5,gain=22.1144333,cover=44.4738464 - 5:leaf=-0.908311546,cover=36.982872 - 6:leaf=0.890622675,cover=7.49097395 - -0:[odor=almond] yes=2,no=1,gain=11.7128553,cover=53.3251991 -1:[ring-type=pendant] yes=4,no=3,gain=12.546154,cover=44.299942 - 3:leaf=-0.515293062,cover=15.7899179 - 4:leaf=0.56883812,cover=28.5100231 - 2:leaf=-1.01502442,cover=9.02525806 - -0:[population=clustered] yes=2,no=1,gain=14.8892794,cover=45.9312019 -1:[odor=none] yes=4,no=3,gain=10.1308851,cover=43.0564575 - 3:leaf=0.217203051,cover=22.3283749 - 4:leaf=-0.734555721,cover=20.7280827 -2:[stalk-root=missing] yes=6,no=5,gain=19.3462334,cover=2.87474418 - 5:leaf=3.63442755,cover=1.34154534 - 6:leaf=-0.609474957,cover=1.53319895 + booster.dump_model(true, None).unwrap(), + "0:[f29<2.00001001] yes=1,no=2,missing=2,gain=4000.53101,cover=1628.25 + 1:[f109<2.00001001] yes=3,no=4,missing=4,gain=198.173828,cover=703.75 + 3:leaf=1.85964918,cover=13.25 + 4:leaf=-1.94070864,cover=690.5 + 2:[f56<2.00001001] yes=5,no=6,missing=6,gain=1158.21204,cover=924.5 + 5:leaf=-1.70044053,cover=112.5 + 6:leaf=1.71217716,cover=812 + +0:[f60<2.00001001] yes=1,no=2,missing=2,gain=832.544983,cover=788.852051 + 1:leaf=-6.23624468,cover=20.462389 + 2:[f29<2.00001001] yes=3,no=4,missing=4,gain=569.725098,cover=768.389709 + 3:leaf=-0.968530357,cover=309.45282 + 4:leaf=0.78471756,cover=458.936859 + +0:[f102<2.00001001] yes=1,no=2,missing=2,gain=368.744568,cover=457.069458 + 1:[f111<2.00001001] yes=3,no=4,missing=4,gain=258.184326,cover=236.018005 + 3:leaf=-9.421422,cover=2.53038669 + 4:leaf=-0.791407049,cover=233.487625 + 2:[f67<2.00001001] yes=5,no=6,missing=6,gain=226.336975,cover=221.051468 + 5:leaf=5.77228642,cover=8.05200672 + 6:leaf=0.658725023,cover=212.999451 + +0:[f27<2.00001001] yes=1,no=2,missing=2,gain=140.486053,cover=364.119354 + 1:leaf=1.07747853,cover=90.0174103 + 2:[f39<2.00001001] yes=3,no=4,missing=4,gain=139.860519,cover=274.101959 + 3:leaf=-0.877905607,cover=178.241974 + 4:leaf=0.614153326,cover=95.8599854 + +0:[f109<2.00001001] yes=1,no=2,missing=2,gain=112.605019,cover=189.202194 + 1:leaf=2.92190909,cover=11.4303684 + 2:[f36<2.00001001] yes=3,no=4,missing=4,gain=66.4029999,cover=177.771835 + 3:leaf=0.152607277,cover=135.494431 + 4:leaf=-1.26934469,cover=42.277401 + +0:[f23<2.00001001] yes=1,no=2,missing=2,gain=52.5610313,cover=170.612762 + 1:[f36<2.00001001] yes=3,no=4,missing=4,gain=12.4420547,cover=19.731596 + 3:leaf=-1.02315068,cover=16.0739021 + 4:leaf=-3.02413678,cover=3.65769386 + 2:[f24<2.00001001] yes=5,no=6,missing=6,gain=67.3869553,cover=150.881165 + 5:leaf=-1.53846073,cover=18.9789505 + 6:leaf=0.431742132,cover=131.902222 + +0:[f29<2.00001001] yes=1,no=2,missing=2,gain=66.2389145,cover=142.360611 + 1:[f109<2.00001001] yes=3,no=4,missing=4,gain=12.1987419,cover=69.6048737 + 3:leaf=0.836115122,cover=3.48375821 + 4:leaf=-0.912605286,cover=66.1211166 + 2:[f24<2.00001001] yes=5,no=6,missing=6,gain=31.229435,cover=72.7557373 + 5:leaf=-1.19710124,cover=8.22473907 + 6:leaf=0.777142286,cover=64.5309982 + +0:[f39<2.00001001] yes=1,no=2,missing=2,gain=20.6531773,cover=79.4027634 + 1:[f27<2.00001001] yes=3,no=4,missing=4,gain=22.1144371,cover=44.4738464 + 3:leaf=0.890622675,cover=7.49097395 + 4:leaf=-0.908311546,cover=36.982872 + 2:[f112<2.00001001] yes=5,no=6,missing=6,gain=16.0703697,cover=34.9289207 + 5:leaf=1.4361918,cover=9.89693928 + 6:leaf=-0.0180106498,cover=25.0319824 + +0:[f23<2.00001001] yes=1,no=2,missing=2,gain=11.7128553,cover=53.3251991 + 1:leaf=-1.01502442,cover=9.02525806 + 2:[f102<2.00001001] yes=3,no=4,missing=4,gain=12.5461531,cover=44.299942 + 3:leaf=0.56883812,cover=28.5100231 + 4:leaf=-0.515293062,cover=15.7899179 + +0:[f115<2.00001001] yes=1,no=2,missing=2,gain=14.8892794,cover=45.9312019 + 1:[f61<2.00001001] yes=3,no=4,missing=4,gain=19.3462334,cover=2.87474418 + 3:leaf=-0.609474957,cover=1.53319895 + 4:leaf=3.63442755,cover=1.34154534 + 2:[f29<2.00001001] yes=5,no=6,missing=6,gain=10.1308861,cover=43.0564575 + 5:leaf=-0.734555721,cover=20.7280827 + 6:leaf=0.217203051,cover=22.3283749 " ); } diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 411a0d1..979ac2f 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -1,5 +1,4 @@ use libc::{c_float, c_uint}; -use std::convert::TryInto; use std::os::unix::ffi::OsStrExt; use std::{ffi, path::Path, ptr, slice}; @@ -31,7 +30,7 @@ static KEY_BASE_MARGIN: &str = "base_margin"; /// ```should_panic /// use xgboost::DMatrix; /// -/// let dmat = DMatrix::load("somefile.txt").unwrap(); +/// let dmat = DMatrix::load(r#"{"uri": "somefile.txt?format=csv"}"#).unwrap(); /// ``` /// /// ## Create from dense array @@ -68,6 +67,7 @@ static KEY_BASE_MARGIN: &str = "base_margin"; /// let dmat = DMatrix::from_csr(indptr, indices, data, None).unwrap(); /// assert_eq!(dmat.shape(), (3, 3)); /// ``` +#[derive(Debug)] pub struct DMatrix { pub(super) handle: xgboost_sys::DMatrixHandle, num_rows: usize, @@ -120,7 +120,7 @@ impl DMatrix { f32::NAN, &mut handle ))?; - Ok(DMatrix::new(handle)?) + DMatrix::new(handle) } /// Create a new `DMatrix` from a sparse @@ -140,12 +140,12 @@ impl DMatrix { indptr.as_ptr(), indices.as_ptr(), data.as_ptr(), - indptr.len().try_into().unwrap(), - data.len().try_into().unwrap(), - num_cols.try_into().unwrap(), + indptr.len(), + data.len(), + num_cols, &mut handle ))?; - Ok(DMatrix::new(handle)?) + DMatrix::new(handle) } /// Create a new `DMatrix` from a sparse @@ -165,12 +165,12 @@ impl DMatrix { indptr.as_ptr(), indices.as_ptr(), data.as_ptr(), - indptr.len().try_into().unwrap(), - data.len().try_into().unwrap(), - num_rows.try_into().unwrap(), + indptr.len(), + data.len(), + num_rows, &mut handle ))?; - Ok(DMatrix::new(handle)?) + DMatrix::new(handle) } /// Create a new `DMatrix` from given file. @@ -205,7 +205,7 @@ impl DMatrix { silent as i32, &mut handle ))?; - Ok(DMatrix::new(handle)?) + DMatrix::new(handle) } /// Serialise this `DMatrix` as a binary file to given path. @@ -246,7 +246,7 @@ impl DMatrix { indices.len() as xgboost_sys::bst_ulong, &mut out_handle ))?; - Ok(DMatrix::new(out_handle)?) + DMatrix::new(out_handle) } /// Get ground truth labels for each row of this matrix. @@ -359,7 +359,7 @@ mod tests { use super::*; use tempfile; fn read_train_matrix() -> XGBResult { - DMatrix::load("xgboost-sys/xgboost/demo/data/agaricus.txt.train") + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#) } #[test] @@ -385,21 +385,27 @@ mod tests { let out_path = tmp_dir.path().join("dmat.bin"); dmat.save(&out_path).unwrap(); - let dmat2 = DMatrix::load(&out_path).unwrap(); + let out_path = out_path.to_string_lossy(); + // let read_path = format!(r#"{{"uri": "{out_path}?format=csv"}}"#); + // let dmat2 = DMatrix::load(&read_path).unwrap(); - assert_eq!(dmat.num_rows(), dmat2.num_rows()); - assert_eq!(dmat.num_cols(), dmat2.num_cols()); + // assert_eq!(dmat.num_rows(), dmat2.num_rows()); + // assert_eq!(dmat.num_cols(), dmat2.num_cols()); // TODO: check contents as well, if possible } #[test] fn get_set_labels() { let mut dmat = read_train_matrix().unwrap(); - assert_eq!(dmat.get_labels().unwrap().len(), 6513); + let labels = dmat.get_labels(); + assert!(labels.is_ok()); + let mut labels = labels.unwrap().to_vec(); + assert_eq!(labels.len(), 6513); - let label = [0.1, 0.0 - 4.5, 11.29842, 333333.33]; - assert!(dmat.set_labels(&label).is_ok()); - assert_eq!(dmat.get_labels().unwrap(), label); + labels[0] = 0.1; + assert_ne!(dmat.get_labels().unwrap(), labels); + assert!(dmat.set_labels(&labels).is_ok()); + assert_eq!(dmat.get_labels().unwrap(), labels); } #[test] @@ -415,12 +421,11 @@ mod tests { #[test] fn get_set_base_margin() { let mut dmat = read_train_matrix().unwrap(); - assert!(dmat.get_base_margin().unwrap().is_empty()); + let base_margin = dmat.get_base_margin(); + assert!(base_margin.is_ok()); + assert!(base_margin.unwrap().is_empty()); - let base_margin = [0.00001, 0.000002, 1.23]; - println!("rows: {:?}, {:?}", dmat.num_rows(), base_margin.len()); - let result = dmat.set_base_margin(&base_margin); - println!("{:?}", result); + let base_margin = vec![0.00001; dmat.num_rows()]; assert!(dmat.set_base_margin(&base_margin).is_ok()); assert_eq!(dmat.get_base_margin().unwrap(), base_margin); } @@ -494,7 +499,8 @@ mod tests { assert_eq!(dmat.slice(&[1]).unwrap().shape(), (1, 2)); assert_eq!(dmat.slice(&[0, 1]).unwrap().shape(), (2, 2)); assert_eq!(dmat.slice(&[3, 2, 1]).unwrap().shape(), (3, 2)); - assert_eq!(dmat.slice(&[10, 11, 12]).unwrap().shape(), (3, 2)); + // slicing out of bounds is not safe and can cause a segfault + // assert_eq!(dmat.slice(&[10, 11, 12]).unwrap().shape(), (3, 2)); } #[test] diff --git a/src/parameters/dart.rs b/src/parameters/dart.rs index c3208a1..7eb06af 100644 --- a/src/parameters/dart.rs +++ b/src/parameters/dart.rs @@ -7,8 +7,10 @@ use super::Interval; /// Type of sampling algorithm. #[derive(Clone)] +#[derive(Default)] pub enum SampleType { /// Dropped trees are selected uniformly. + #[default] Uniform, /// Dropped trees are selected in proportion to weight. @@ -24,18 +26,16 @@ impl ToString for SampleType { } } -impl Default for SampleType { - fn default() -> Self { - SampleType::Uniform - } -} + /// Type of normalization algorithm. #[derive(Clone)] +#[derive(Default)] pub enum NormalizeType { /// New trees have the same weight of each of dropped trees. /// * weight of new trees are 1 / (k + learning_rate) /// dropped trees are scaled by a factor of k / (k + learning_rate) + #[default] Tree, /// New trees have the same weight of sum of dropped trees (forest). @@ -54,11 +54,7 @@ impl ToString for NormalizeType { } } -impl Default for NormalizeType { - fn default() -> Self { - NormalizeType::Tree - } -} + /// Additional parameters for Dart Booster. #[derive(Builder, Clone)] @@ -100,17 +96,14 @@ impl Default for DartBoosterParameters { impl DartBoosterParameters { pub(crate) fn as_string_pairs(&self) -> Vec<(String, String)> { - let mut v = Vec::new(); - - v.push(("booster".to_owned(), "dart".to_owned())); - - v.push(("sample_type".to_owned(), self.sample_type.to_string())); - v.push(("normalize_type".to_owned(), self.normalize_type.to_string())); - v.push(("rate_drop".to_owned(), self.rate_drop.to_string())); - v.push(("one_drop".to_owned(), (self.one_drop as u8).to_string())); - v.push(("skip_drop".to_owned(), self.skip_drop.to_string())); - - v + vec![ + ("booster".to_owned(), "dart".to_owned()), + ("sample_type".to_owned(), self.sample_type.to_string()), + ("normalize_type".to_owned(), self.normalize_type.to_string()), + ("rate_drop".to_owned(), self.rate_drop.to_string()), + ("one_drop".to_owned(), (self.one_drop as u8).to_string()), + ("skip_drop".to_owned(), self.skip_drop.to_string()) + ] } } diff --git a/src/parameters/learning.rs b/src/parameters/learning.rs index 994dbfd..35fc6a7 100644 --- a/src/parameters/learning.rs +++ b/src/parameters/learning.rs @@ -7,8 +7,10 @@ use std::default::Default; use super::Interval; /// Learning objective used when training a booster model. +#[derive(Default)] pub enum Objective { /// Linear regression. + #[default] RegLinear, /// Logistic regression. @@ -98,11 +100,7 @@ impl ToString for Objective { } } -impl Default for Objective { - fn default() -> Self { - Objective::RegLinear - } -} + /// Type of evaluation metrics to use during learning. #[derive(Clone)] diff --git a/src/parameters/linear.rs b/src/parameters/linear.rs index 90e8618..ac52e4f 100644 --- a/src/parameters/linear.rs +++ b/src/parameters/linear.rs @@ -4,9 +4,11 @@ use std::default::Default; /// Linear model algorithm. #[derive(Clone)] +#[derive(Default)] pub enum LinearUpdate { /// Parallel coordinate descent algorithm based on shotgun algorithm. Uses ‘hogwild’ parallelism and /// therefore produces a nondeterministic solution on each run. + #[default] Shotgun, /// Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. @@ -22,11 +24,7 @@ impl ToString for LinearUpdate { } } -impl Default for LinearUpdate { - fn default() -> Self { - LinearUpdate::Shotgun - } -} + /// BoosterParameters for Linear Booster. #[derive(Builder, Clone)] @@ -52,15 +50,12 @@ pub struct LinearBoosterParameters { impl LinearBoosterParameters { pub(crate) fn as_string_pairs(&self) -> Vec<(String, String)> { - let mut v = Vec::new(); - - v.push(("booster".to_owned(), "gblinear".to_owned())); - - v.push(("lambda".to_owned(), self.lambda.to_string())); - v.push(("alpha".to_owned(), self.alpha.to_string())); - v.push(("updater".to_owned(), self.updater.to_string())); - - v + vec![ + ("booster".to_owned(), "gblinear".to_owned()), + ("lambda".to_owned(), self.lambda.to_string()), + ("alpha".to_owned(), self.alpha.to_string()), + ("updater".to_owned(), self.updater.to_string()), + ] } } diff --git a/src/parameters/mod.rs b/src/parameters/mod.rs index d572518..9e0ddb2 100644 --- a/src/parameters/mod.rs +++ b/src/parameters/mod.rs @@ -277,7 +277,7 @@ impl Interval { fn validate(&self, val: &Option, name: &str) -> Result<(), String> { match &val { Some(ref val) => { - if self.contains(&val) { + if self.contains(val) { Ok(()) } else { Err(format!( diff --git a/src/parameters/tree.rs b/src/parameters/tree.rs index c48e710..7c43ff4 100644 --- a/src/parameters/tree.rs +++ b/src/parameters/tree.rs @@ -357,31 +357,30 @@ impl Default for TreeBoosterParameters { impl TreeBoosterParameters { pub(crate) fn as_string_pairs(&self) -> Vec<(String, String)> { - let mut v = Vec::new(); - - v.push(("booster".to_owned(), "gbtree".to_owned())); - - v.push(("eta".to_owned(), self.eta.to_string())); - v.push(("gamma".to_owned(), self.gamma.to_string())); - v.push(("max_depth".to_owned(), self.max_depth.to_string())); - v.push(("min_child_weight".to_owned(), self.min_child_weight.to_string())); - v.push(("max_delta_step".to_owned(), self.max_delta_step.to_string())); - v.push(("subsample".to_owned(), self.subsample.to_string())); - v.push(("colsample_bytree".to_owned(), self.colsample_bytree.to_string())); - v.push(("colsample_bylevel".to_owned(), self.colsample_bylevel.to_string())); - v.push(("colsample_bynode".to_owned(), self.colsample_bynode.to_string())); - v.push(("lambda".to_owned(), self.lambda.to_string())); - v.push(("alpha".to_owned(), self.alpha.to_string())); - v.push(("tree_method".to_owned(), self.tree_method.to_string())); - v.push(("sketch_eps".to_owned(), self.sketch_eps.to_string())); - v.push(("scale_pos_weight".to_owned(), self.scale_pos_weight.to_string())); - v.push(("refresh_leaf".to_owned(), (self.refresh_leaf as u8).to_string())); - v.push(("process_type".to_owned(), self.process_type.to_string())); - v.push(("grow_policy".to_owned(), self.grow_policy.to_string())); - v.push(("max_leaves".to_owned(), self.max_leaves.to_string())); - v.push(("max_bin".to_owned(), self.max_bin.to_string())); - v.push(("num_parallel_tree".to_owned(), self.num_parallel_tree.to_string())); - v.push(("predictor".to_owned(), self.predictor.to_string())); + let mut v = vec! [ + ("booster".to_owned(), "gbtree".to_owned()), + ("eta".to_owned(), self.eta.to_string()), + ("gamma".to_owned(), self.gamma.to_string()), + ("max_depth".to_owned(), self.max_depth.to_string()), + ("min_child_weight".to_owned(), self.min_child_weight.to_string()), + ("max_delta_step".to_owned(), self.max_delta_step.to_string()), + ("subsample".to_owned(), self.subsample.to_string()), + ("colsample_bytree".to_owned(), self.colsample_bytree.to_string()), + ("colsample_bylevel".to_owned(), self.colsample_bylevel.to_string()), + ("colsample_bynode".to_owned(), self.colsample_bynode.to_string()), + ("lambda".to_owned(), self.lambda.to_string()), + ("alpha".to_owned(), self.alpha.to_string()), + ("tree_method".to_owned(), self.tree_method.to_string()), + ("sketch_eps".to_owned(), self.sketch_eps.to_string()), + ("scale_pos_weight".to_owned(), self.scale_pos_weight.to_string()), + ("refresh_leaf".to_owned(), (self.refresh_leaf as u8).to_string()), + ("process_type".to_owned(), self.process_type.to_string()), + ("grow_policy".to_owned(), self.grow_policy.to_string()), + ("max_leaves".to_owned(), self.max_leaves.to_string()), + ("max_bin".to_owned(), self.max_bin.to_string()), + ("num_parallel_tree".to_owned(), self.num_parallel_tree.to_string()), + ("predictor".to_owned(), self.predictor.to_string()), + ]; // Don't pass anything to XGBoost if the user didn't specify anything. // This allows XGBoost to figure it out on it's own, and suppresses the diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 78ec77f..82d846b 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 78ec77fa9787de69e4458e3f32f9d39ea1281955 +Subproject commit 82d846bbeb83c652a0b1dff0e3519e67569c4a3d From e33922d37c3227b83d5415b2e47c49b037616280 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 21:42:07 -0800 Subject: [PATCH 20/36] wut wut --- src/dmatrix.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 979ac2f..97bb609 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -61,10 +61,10 @@ static KEY_BASE_MARGIN: &str = "base_margin"; /// ``` /// use xgboost::DMatrix; /// -/// let indptr = &[0, 2, 3, 6]; +/// let indptr = &[0, 1, 2, 6]; /// let indices = &[0, 2, 2, 0, 1, 2]; /// let data = &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; -/// let dmat = DMatrix::from_csr(indptr, indices, data, None).unwrap(); +/// let dmat = DMatrix::from_csc(indptr, indices, data, None).unwrap(); /// assert_eq!(dmat.shape(), (3, 3)); /// ``` #[derive(Debug)] @@ -199,10 +199,8 @@ impl DMatrix { debug!("Loading DMatrix from: {}", path.as_ref().display()); let mut handle = ptr::null_mut(); let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); - let silent = true; - xgb_call!(xgboost_sys::XGDMatrixCreateFromFile( + xgb_call!(xgboost_sys::XGDMatrixCreateFromURI( fname.as_ptr(), - silent as i32, &mut handle ))?; DMatrix::new(handle) From 247b2629556f54f6ca18600cd1d8cc0a27257599 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 22:15:29 -0800 Subject: [PATCH 21/36] binary --- src/dmatrix.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 97bb609..d43f461 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -206,6 +206,19 @@ impl DMatrix { DMatrix::new(handle) } + + pub fn load_binary>(path: P) -> XGBResult { + debug!("Loading DMatrix from: {}", path.as_ref().display()); + let mut handle = ptr::null_mut(); + let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); + xgb_call!(xgboost_sys::XGDMatrixCreateFromFile( + fname.as_ptr(), + 1, + &mut handle + )).unwrap(); + DMatrix::new(handle) + } + /// Serialise this `DMatrix` as a binary file to given path. pub fn save>(&self, path: P) -> XGBResult<()> { debug!("Writing DMatrix to: {}", path.as_ref().display()); @@ -383,12 +396,10 @@ mod tests { let out_path = tmp_dir.path().join("dmat.bin"); dmat.save(&out_path).unwrap(); - let out_path = out_path.to_string_lossy(); - // let read_path = format!(r#"{{"uri": "{out_path}?format=csv"}}"#); - // let dmat2 = DMatrix::load(&read_path).unwrap(); + let dmat2 = DMatrix::load_binary(out_path).unwrap(); - // assert_eq!(dmat.num_rows(), dmat2.num_rows()); - // assert_eq!(dmat.num_cols(), dmat2.num_cols()); + assert_eq!(dmat.num_rows(), dmat2.num_rows()); + assert_eq!(dmat.num_cols(), dmat2.num_cols()); // TODO: check contents as well, if possible } From 60074af11763d09acc67a7be6daf66bbf9debc0e Mon Sep 17 00:00:00 2001 From: Montana Low Date: Thu, 28 Dec 2023 22:38:25 -0800 Subject: [PATCH 22/36] tests pass --- src/booster.rs | 30 ++++++++++++++++++++---------- src/dmatrix.rs | 12 ++---------- src/parameters/dart.rs | 12 +++--------- src/parameters/learning.rs | 2 -- src/parameters/linear.rs | 5 +---- src/parameters/tree.rs | 2 +- 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index 39d326c..6471bd0 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -761,7 +761,8 @@ mod tests { #[test] fn save_and_load_from_buffer() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); let mut booster = Booster::new_with_cached_dmats(&BoosterParameters::default(), &[&dmat_train]).unwrap(); let attr = booster.get_attribute("foo").expect("Getting attribute failed"); assert_eq!(attr, None); @@ -804,8 +805,10 @@ mod tests { #[test] fn predict() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); - let dmat_test =DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -886,8 +889,10 @@ mod tests { #[test] fn predict_leaf() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); - let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -919,8 +924,10 @@ mod tests { #[test] fn predict_contributions() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); - let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -953,8 +960,10 @@ mod tests { #[test] fn predict_interactions() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); - let dmat_test = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_test = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); let tree_params = tree::TreeBoosterParametersBuilder::default() .max_depth(2) @@ -1005,7 +1014,8 @@ mod tests { #[test] fn dump_model() { - let dmat_train = DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dmat_train = + DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); println!("{:?}", dmat_train.shape()); diff --git a/src/dmatrix.rs b/src/dmatrix.rs index d43f461..98cf30a 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -199,23 +199,15 @@ impl DMatrix { debug!("Loading DMatrix from: {}", path.as_ref().display()); let mut handle = ptr::null_mut(); let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); - xgb_call!(xgboost_sys::XGDMatrixCreateFromURI( - fname.as_ptr(), - &mut handle - ))?; + xgb_call!(xgboost_sys::XGDMatrixCreateFromURI(fname.as_ptr(), &mut handle))?; DMatrix::new(handle) } - pub fn load_binary>(path: P) -> XGBResult { debug!("Loading DMatrix from: {}", path.as_ref().display()); let mut handle = ptr::null_mut(); let fname = ffi::CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); - xgb_call!(xgboost_sys::XGDMatrixCreateFromFile( - fname.as_ptr(), - 1, - &mut handle - )).unwrap(); + xgb_call!(xgboost_sys::XGDMatrixCreateFromFile(fname.as_ptr(), 1, &mut handle)).unwrap(); DMatrix::new(handle) } diff --git a/src/parameters/dart.rs b/src/parameters/dart.rs index 7eb06af..42f254e 100644 --- a/src/parameters/dart.rs +++ b/src/parameters/dart.rs @@ -6,8 +6,7 @@ use std::default::Default; use super::Interval; /// Type of sampling algorithm. -#[derive(Clone)] -#[derive(Default)] +#[derive(Clone, Default)] pub enum SampleType { /// Dropped trees are selected uniformly. #[default] @@ -26,11 +25,8 @@ impl ToString for SampleType { } } - - /// Type of normalization algorithm. -#[derive(Clone)] -#[derive(Default)] +#[derive(Clone, Default)] pub enum NormalizeType { /// New trees have the same weight of each of dropped trees. /// * weight of new trees are 1 / (k + learning_rate) @@ -54,8 +50,6 @@ impl ToString for NormalizeType { } } - - /// Additional parameters for Dart Booster. #[derive(Builder, Clone)] #[builder(build_fn(validate = "Self::validate"))] @@ -102,7 +96,7 @@ impl DartBoosterParameters { ("normalize_type".to_owned(), self.normalize_type.to_string()), ("rate_drop".to_owned(), self.rate_drop.to_string()), ("one_drop".to_owned(), (self.one_drop as u8).to_string()), - ("skip_drop".to_owned(), self.skip_drop.to_string()) + ("skip_drop".to_owned(), self.skip_drop.to_string()), ] } } diff --git a/src/parameters/learning.rs b/src/parameters/learning.rs index 35fc6a7..5ce9808 100644 --- a/src/parameters/learning.rs +++ b/src/parameters/learning.rs @@ -100,8 +100,6 @@ impl ToString for Objective { } } - - /// Type of evaluation metrics to use during learning. #[derive(Clone)] pub enum Metrics { diff --git a/src/parameters/linear.rs b/src/parameters/linear.rs index ac52e4f..562905d 100644 --- a/src/parameters/linear.rs +++ b/src/parameters/linear.rs @@ -3,8 +3,7 @@ use std::default::Default; /// Linear model algorithm. -#[derive(Clone)] -#[derive(Default)] +#[derive(Clone, Default)] pub enum LinearUpdate { /// Parallel coordinate descent algorithm based on shotgun algorithm. Uses ‘hogwild’ parallelism and /// therefore produces a nondeterministic solution on each run. @@ -24,8 +23,6 @@ impl ToString for LinearUpdate { } } - - /// BoosterParameters for Linear Booster. #[derive(Builder, Clone)] #[builder(default)] diff --git a/src/parameters/tree.rs b/src/parameters/tree.rs index 7c43ff4..6c7343c 100644 --- a/src/parameters/tree.rs +++ b/src/parameters/tree.rs @@ -357,7 +357,7 @@ impl Default for TreeBoosterParameters { impl TreeBoosterParameters { pub(crate) fn as_string_pairs(&self) -> Vec<(String, String)> { - let mut v = vec! [ + let mut v = vec![ ("booster".to_owned(), "gbtree".to_owned()), ("eta".to_owned(), self.eta.to_string()), ("gamma".to_owned(), self.gamma.to_string()), From 0307a92960290bbe74b69cdeff2dd9090201f346 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Mon, 29 Apr 2024 11:54:50 -0700 Subject: [PATCH 23/36] bump version, fix c++ stdlib bindgen (un)support --- xgboost-sys/Cargo.toml | 2 +- xgboost-sys/README.md | 2 +- xgboost-sys/build.rs | 1 + xgboost-sys/xgboost | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index 941790f..b4603ff 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -13,7 +13,7 @@ readme = "README.md" libc = "0.2" [build-dependencies] -bindgen = "0.68" +bindgen = "0.69" cmake = "0.1" [features] diff --git a/xgboost-sys/README.md b/xgboost-sys/README.md index df39717..4a42bcc 100644 --- a/xgboost-sys/README.md +++ b/xgboost-sys/README.md @@ -3,4 +3,4 @@ FFI bindings to [XGBoost](https://xgboost.readthedocs.io/), generated at compile time with [bindgen](https://github.com/rust-lang-nursery/rust-bindgen). -Currently uses XGBoost v0.81. +Currently uses XGBoost v2.0. diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index c48cf83..93ed420 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -50,6 +50,7 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") + .blocklist_item("std::.*")// stdlib is not well supported by bindgen .clang_args(&["-x", "c++", "-std=c++11"]) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 82d846b..5e64276 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 82d846bbeb83c652a0b1dff0e3519e67569c4a3d +Subproject commit 5e64276a9b95df57e6dd8f9e63347636f4e5d331 From ffa1b735c6e0ca853e65dfb4b6055f54fe79c3a2 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Mon, 29 Apr 2024 11:56:14 -0700 Subject: [PATCH 24/36] removed upstream dmlc/xgboost/pull/6505 --- src/booster.rs | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index 6471bd0..e4b175c 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -152,7 +152,7 @@ impl Booster { //let num_parallel_tree = 1; // load distributed code checkpoint from rabit - let version = bst.load_rabit_checkpoint()?; + let version = unsafe { xgboost_sys::RabitVersionNumber() }; debug!("Loaded Rabit checkpoint: version={}", version); assert!(unsafe { xgboost_sys::RabitGetWorldSize() != 1 || version == 0 }); @@ -171,7 +171,6 @@ impl Booster { debug!("Updating in round: {}", i); bst.update(params.dtrain, i)?; } - bst.save_rabit_checkpoint()?; } assert!(unsafe { xgboost_sys::RabitGetWorldSize() == 1 || version == xgboost_sys::RabitVersionNumber() }); @@ -328,7 +327,7 @@ impl Booster { let name = "default"; let mut eval = self.eval_set(&[(dmat, name)], 0)?; let mut result = HashMap::new(); - eval.remove(name).unwrap().into_iter().for_each(|(k, v)| { + eval.swap_remove(name).unwrap().into_iter().for_each(|(k, v)| { result.insert(k.to_owned(), v); }); @@ -564,16 +563,6 @@ impl Booster { Ok(out_vec.join("\n")) } - pub(crate) fn load_rabit_checkpoint(&self) -> XGBResult { - let mut version = 0; - xgb_call!(xgboost_sys::XGBoosterLoadRabitCheckpoint(self.handle, &mut version))?; - Ok(version) - } - - pub(crate) fn save_rabit_checkpoint(&self) -> XGBResult<()> { - xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) - } - pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); @@ -742,11 +731,6 @@ mod tests { assert!(res.is_ok()); } - #[test] - fn load_rabit_version() { - let version = load_test_booster().load_rabit_checkpoint().unwrap(); - assert_eq!(version, 0); - } #[test] fn get_set_attr() { @@ -841,7 +825,7 @@ mod tests { assert_eq!(*train_metrics.get("map@4-").unwrap(), 1.0); let test_metrics = booster.evaluate(&dmat_test).unwrap(); - assert_eq!(*test_metrics.get("logloss").unwrap(), 0.006919953); + assert_eq!(*test_metrics.get("logloss").unwrap(), 0.0069199526); assert_eq!(*test_metrics.get("map@4-").unwrap(), 1.0); let v = booster.predict(&dmat_test).unwrap(); From f81944d4e3a3fea4beae413292ad46cf0878b601 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Mon, 29 Apr 2024 11:56:14 -0700 Subject: [PATCH 25/36] removed upstream dmlc/xgboost/pull/6505 --- src/booster.rs | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index e4b175c..d33f23b 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -149,34 +149,7 @@ impl Booster { }; let mut bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; - //let num_parallel_tree = 1; - - // load distributed code checkpoint from rabit - let version = unsafe { xgboost_sys::RabitVersionNumber() }; - debug!("Loaded Rabit checkpoint: version={}", version); - assert!(unsafe { xgboost_sys::RabitGetWorldSize() != 1 || version == 0 }); - - let _rank = unsafe { xgboost_sys::RabitGetRank() }; - let start_iteration = version / 2; - //let mut nboost = start_iteration; - - for i in start_iteration..params.boost_rounds as i32 { - // distributed code: need to resume to this point - // skip first update if a recovery step - if version % 2 == 0 { - if let Some(objective_fn) = params.custom_objective_fn { - debug!("Boosting in round: {}", i); - bst.update_custom(params.dtrain, objective_fn)?; - } else { - debug!("Updating in round: {}", i); - bst.update(params.dtrain, i)?; - } - } - - assert!(unsafe { xgboost_sys::RabitGetWorldSize() == 1 || version == xgboost_sys::RabitVersionNumber() }); - - //nboost += 1; - + for i in 0..params.boost_rounds as i32 { if let Some(eval_sets) = params.evaluation_sets { let mut dmat_eval_results = bst.eval_set(eval_sets, i)?; From 1da1c032e37ba4ecdc2808fe16dfd663034bdb71 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Mon, 29 Apr 2024 14:48:45 -0700 Subject: [PATCH 26/36] intel/m1 compatibility --- src/booster.rs | 2 +- xgboost-sys/build.rs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index d33f23b..b4a36bc 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -148,7 +148,7 @@ impl Booster { dmats }; - let mut bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; + let bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; for i in 0..params.boost_rounds as i32 { if let Some(eval_sets) = params.evaluation_sets { let mut dmat_eval_results = bst.eval_set(eval_sets, i)?; diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 93ed420..81c0a43 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -33,17 +33,21 @@ fn main() { #[cfg(not(feature = "cuda"))] let mut dst = Config::new(&xgb_root); - let dst = dst.uses_cxx11() + let mut dst = dst.uses_cxx11() .define("BUILD_STATIC_LIB", "ON"); #[cfg(target_os = "macos")] - let dst = - dst - .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") - .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") - .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") - .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); - + { + let path = PathBuf::from("/opt/homebrew/"); // check for m1 vs intel config + if let Ok(_dir) = std::fs::read_dir(&path) { + dst = + dst + .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") + .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") + .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); + }; + } let dst = dst.build(); let xgb_root = xgb_root.canonicalize().unwrap(); From 767f0eafcac284afbe787b2f0adac52c876f746e Mon Sep 17 00:00:00 2001 From: Montana Low Date: Mon, 29 Apr 2024 17:59:32 -0500 Subject: [PATCH 27/36] Update README.md Update version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 009f869..58a348a 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ more detailed examples of different features. Currently in a very early stage of development, so the API is changing as usability issues occur, or new features are supported. -Builds against XGBoost 0.81. +Builds against XGBoost 2.0.3. ### Platforms From 51e606a8cecbccfd2fb85b9eae47ba3882b04569 Mon Sep 17 00:00:00 2001 From: cyccbxhl Date: Tue, 8 Oct 2024 14:00:29 +0800 Subject: [PATCH 28/36] Fix the NaN recall in xgboost training: Update the tag of xgboost to v2.0.3; Add train code in train iteration; Passed all examples. --- examples/basic/src/main.rs | 6 +-- examples/generalised_linear_model/src/main.rs | 4 +- src/booster.rs | 39 ++++++++++++++++++- src/parameters/learning.rs | 4 +- xgboost-sys/build.rs | 5 ++- xgboost-sys/xgboost | 2 +- 6 files changed, 48 insertions(+), 12 deletions(-) diff --git a/examples/basic/src/main.rs b/examples/basic/src/main.rs index 8e1ca4d..eee9713 100644 --- a/examples/basic/src/main.rs +++ b/examples/basic/src/main.rs @@ -66,15 +66,15 @@ fn main() { // save and load model file println!("\nSaving and loading Booster model..."); - booster.save("xgb.model").unwrap(); - let booster = Booster::load("xgb.model").unwrap(); + booster.save("xgb.json").unwrap(); + let booster = Booster::load("xgb.json").unwrap(); let preds2 = booster.predict(&dtest).unwrap(); assert_eq!(preds, preds2); // save and load data matrix file println!("\nSaving and loading matrix data..."); dtest.save("test.dmat").unwrap(); - let dtest2 = DMatrix::load("test.dmat").unwrap(); + let dtest2 = DMatrix::load_binary("test.dmat").unwrap(); assert_eq!(booster.predict(&dtest2).unwrap(), preds); // error handling example diff --git a/examples/generalised_linear_model/src/main.rs b/examples/generalised_linear_model/src/main.rs index a34974c..ceb1022 100644 --- a/examples/generalised_linear_model/src/main.rs +++ b/examples/generalised_linear_model/src/main.rs @@ -12,8 +12,8 @@ fn main() { // load train and test matrices from text files (in LibSVM format) println!("Custom objective example..."); - let dtrain = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.train").unwrap(); - let dtest = DMatrix::load("../../xgboost-sys/xgboost/demo/data/agaricus.txt.test").unwrap(); + let dtrain = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#).unwrap(); + let dtest = DMatrix::load(r#"{"uri": "../../xgboost-sys/xgboost/demo/data/agaricus.txt.test?format=libsvm"}"#).unwrap(); // configure objectives, metrics, etc. let learning_params = parameters::learning::LearningTaskParametersBuilder::default() diff --git a/src/booster.rs b/src/booster.rs index b4a36bc..4e91689 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -148,8 +148,29 @@ impl Booster { dmats }; - let bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; - for i in 0..params.boost_rounds as i32 { + let mut bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; + // load distributed code checkpoint from rabit + let mut version = bst.load_rabit_checkpoint()?; + debug!("Loaded Rabit checkpoint: version={}", version); + assert!(unsafe { xgboost_sys::RabitGetWorldSize() != 1 || version == 0 }); + let start_iteration = version / 2; + for i in start_iteration..params.boost_rounds as i32 { + // distributed code: need to resume to this point + // skip first update if a recovery step + if version % 2 == 0 { + if let Some(objective_fn) = params.custom_objective_fn { + debug!("Boosting in round: {}", i); + bst.update_custom(params.dtrain, objective_fn)?; + } else { + debug!("Updating in round: {}", i); + bst.update(params.dtrain, i)?; + } + let _ = bst.save_rabit_checkpoint()?; + version += 1; + } + + assert!(unsafe { xgboost_sys::RabitGetWorldSize() == 1 || version == xgboost_sys::RabitVersionNumber() }); + if let Some(eval_sets) = params.evaluation_sets { let mut dmat_eval_results = bst.eval_set(eval_sets, i)?; @@ -182,6 +203,10 @@ impl Booster { } println!(); } + + // do checkpoint after evaluation, in case evaluation also updates booster. + let _ = bst.save_rabit_checkpoint(); + version += 1; } Ok(bst) @@ -536,6 +561,16 @@ impl Booster { Ok(out_vec.join("\n")) } + pub(crate) fn load_rabit_checkpoint(&self) -> XGBResult { + let mut version = 0; + xgb_call!(xgboost_sys::XGBoosterLoadRabitCheckpoint(self.handle, &mut version))?; + Ok(version) + } + + pub(crate) fn save_rabit_checkpoint(&self) -> XGBResult<()> { + xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) + } + pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); diff --git a/src/parameters/learning.rs b/src/parameters/learning.rs index 5ce9808..828e70e 100644 --- a/src/parameters/learning.rs +++ b/src/parameters/learning.rs @@ -81,11 +81,11 @@ impl Clone for Objective { impl ToString for Objective { fn to_string(&self) -> String { match *self { - Objective::RegLinear => "reg:linear".to_owned(), + Objective::RegLinear => "reg:squarederror".to_owned(), Objective::RegLogistic => "reg:logistic".to_owned(), Objective::BinaryLogistic => "binary:logistic".to_owned(), Objective::BinaryLogisticRaw => "binary:logitraw".to_owned(), - Objective::GpuRegLinear => "gpu:reg:linear".to_owned(), + Objective::GpuRegLinear => "gpu:reg:squarederror".to_owned(), Objective::GpuRegLogistic => "gpu:reg:logistic".to_owned(), Objective::GpuBinaryLogistic => "gpu:binary:logistic".to_owned(), Objective::GpuBinaryLogisticRaw => "gpu:binary:logitraw".to_owned(), diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 81c0a43..01610b8 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -33,7 +33,7 @@ fn main() { #[cfg(not(feature = "cuda"))] let mut dst = Config::new(&xgb_root); - let mut dst = dst.uses_cxx11() + let dst = dst.uses_cxx11() .define("BUILD_STATIC_LIB", "ON"); #[cfg(target_os = "macos")] @@ -54,7 +54,6 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") - .blocklist_item("std::.*")// stdlib is not well supported by bindgen .clang_args(&["-x", "c++", "-std=c++11"]) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) @@ -86,6 +85,8 @@ fn main() { println!("cargo:rustc-link-lib=c++"); println!("cargo:rustc-link-lib=dylib=omp"); } else { + println!("cargo:rustc-cxxflags=-std=c++17"); + println!("cargo:rustc-link-lib=stdc++fs"); println!("cargo:rustc-link-lib=stdc++"); println!("cargo:rustc-link-lib=dylib=gomp"); } diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 5e64276..82d846b 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 5e64276a9b95df57e6dd8f9e63347636f4e5d331 +Subproject commit 82d846bbeb83c652a0b1dff0e3519e67569c4a3d From cc976b2c430d15e4287c4764f7848383f80072ea Mon Sep 17 00:00:00 2001 From: cyccbxhl Date: Tue, 8 Oct 2024 17:48:47 +0800 Subject: [PATCH 29/36] Tested successfully on MAC as well. --- xgboost-sys/build.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 01610b8..71988bf 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -33,7 +33,8 @@ fn main() { #[cfg(not(feature = "cuda"))] let mut dst = Config::new(&xgb_root); - let dst = dst.uses_cxx11() + #[allow(unused_mut)] + let mut dst = dst.uses_cxx11() .define("BUILD_STATIC_LIB", "ON"); #[cfg(target_os = "macos")] @@ -54,16 +55,12 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") + .blocklist_item("std::__1.*") .clang_args(&["-x", "c++", "-std=c++11"]) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())); - #[cfg(target_os = "linux")] - let bindings = bindings - .clang_arg(format!("-I/usr/include/c++/11")) - .clang_arg(format!("-I/usr/include/x86_64-linux-gnu/c++/11")); - #[cfg(feature = "cuda")] let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); let bindings = bindings @@ -83,6 +80,7 @@ fn main() { // link to appropriate C++ lib if target.contains("apple") { println!("cargo:rustc-link-lib=c++"); + println!("cargo:rustc-link-search=native=/opt/homebrew/opt/libomp/lib"); println!("cargo:rustc-link-lib=dylib=omp"); } else { println!("cargo:rustc-cxxflags=-std=c++17"); From 244d553b2a2725ce2309e368b0df06ec7e2deab9 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 23 Jan 2025 09:46:26 -0800 Subject: [PATCH 30/36] Updated to use c++17 --- xgboost-sys/build.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index 71988bf..d04dac4 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -2,9 +2,9 @@ extern crate bindgen; extern crate cmake; use cmake::Config; -use std::process::Command; use std::env; use std::path::{Path, PathBuf}; +use std::process::Command; fn main() { let target = env::var("TARGET").unwrap(); @@ -24,29 +24,27 @@ fn main() { // CMake #[cfg(feature = "cuda")] let dst = Config::new(&xgb_root) - .uses_cxx11() .define("BUILD_STATIC_LIB", "ON") .define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") - .define("BUILD_WITH_CUDA_CUB", "ON"); + .define("BUILD_WITH_CUDA_CUB", "ON") + .define("CMAKE_CXX_STANDARD", "17"); #[cfg(not(feature = "cuda"))] let mut dst = Config::new(&xgb_root); #[allow(unused_mut)] - let mut dst = dst.uses_cxx11() - .define("BUILD_STATIC_LIB", "ON"); + let mut dst = dst.define("BUILD_STATIC_LIB", "ON").define("CMAKE_CXX_STANDARD", "17"); #[cfg(target_os = "macos")] { let path = PathBuf::from("/opt/homebrew/"); // check for m1 vs intel config if let Ok(_dir) = std::fs::read_dir(&path) { - dst = - dst - .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") - .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") - .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") - .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); + dst = dst + .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") + .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") + .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); }; } let dst = dst.build(); @@ -56,16 +54,14 @@ fn main() { let bindings = bindgen::Builder::default() .header("wrapper.h") .blocklist_item("std::__1.*") - .clang_args(&["-x", "c++", "-std=c++11"]) + .clang_args(&["-x", "c++", "-std=c++17"]) .clang_arg(format!("-I{}", xgb_root.join("include").display())) .clang_arg(format!("-I{}", xgb_root.join("rabit/include").display())) .clang_arg(format!("-I{}", xgb_root.join("dmlc-core/include").display())); #[cfg(feature = "cuda")] let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); - let bindings = bindings - .generate() - .expect("Unable to generate bindings."); + let bindings = bindings.generate().expect("Unable to generate bindings."); let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); bindings From 60e832f9fdbccc8840b91b8122498be472de6775 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:34:45 -0800 Subject: [PATCH 31/36] Cleanup --- xgboost-sys/build.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index d04dac4..e9f909c 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -21,27 +21,22 @@ fn main() { }); } + let mut dst = Config::new(&xgb_root); + dst.define("BUILD_STATIC_LIB", "ON").define("CMAKE_CXX_STANDARD", "17"); + // CMake #[cfg(feature = "cuda")] - let dst = Config::new(&xgb_root) - .define("BUILD_STATIC_LIB", "ON") + dst.define("BUILD_STATIC_LIB", "ON") .define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") .define("BUILD_WITH_CUDA_CUB", "ON") .define("CMAKE_CXX_STANDARD", "17"); - #[cfg(not(feature = "cuda"))] - let mut dst = Config::new(&xgb_root); - - #[allow(unused_mut)] - let mut dst = dst.define("BUILD_STATIC_LIB", "ON").define("CMAKE_CXX_STANDARD", "17"); - #[cfg(target_os = "macos")] { let path = PathBuf::from("/opt/homebrew/"); // check for m1 vs intel config if let Ok(_dir) = std::fs::read_dir(&path) { - dst = dst - .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + dst.define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); From 07a6a4a4a71111731e643569ed72fc7ca7e19808 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:38:23 -0800 Subject: [PATCH 32/36] Cleanup --- xgboost-sys/build.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index e9f909c..c6ba1a6 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -26,11 +26,9 @@ fn main() { // CMake #[cfg(feature = "cuda")] - dst.define("BUILD_STATIC_LIB", "ON") - .define("USE_CUDA", "ON") + dst.define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") - .define("BUILD_WITH_CUDA_CUB", "ON") - .define("CMAKE_CXX_STANDARD", "17"); + .define("BUILD_WITH_CUDA_CUB", "ON"); #[cfg(target_os = "macos")] { From d851631de9f77a45c79463baad3da32a0a2cbdf1 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Mon, 27 Jan 2025 11:50:19 -0600 Subject: [PATCH 33/36] add reqs to README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 58a348a..c408a4c 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,12 @@ Rust bindings for the [XGBoost](https://xgboost.ai) gradient boosting library. +## Requirements + +- Clang v16.0.0 + +## Documentation + * [Documentation](https://docs.rs/xgboost) Basic usage example: From ec745a45da735d0a1cfaa54e2bd350f45d4ca8af Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 28 Jan 2025 17:08:41 -0800 Subject: [PATCH 34/36] update for 2021 edition --- Cargo.toml | 7 ++-- src/booster.rs | 45 +++++++++++++--------- src/dmatrix.rs | 6 ++- xgboost-sys/.cargo/{config => config.toml} | 0 xgboost-sys/Cargo.toml | 3 +- xgboost-sys/build.rs | 15 ++++++-- xgboost-sys/xgboost | 2 +- 7 files changed, 49 insertions(+), 29 deletions(-) rename xgboost-sys/.cargo/{config => config.toml} (100%) diff --git a/Cargo.toml b/Cargo.toml index de79cf2..2a68045 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,14 +8,15 @@ homepage = "https://github.com/davechallis/rust-xgboost" description = "Machine learning using XGBoost" documentation = "https://docs.rs/xgboost" readme = "README.md" +edition = "2021" [dependencies] xgboost-sys = { path = "xgboost-sys" } libc = "0.2" -derive_builder = "0.12" +derive_builder = "0.20" log = "0.4" -tempfile = "3.9" -indexmap = "2.1" +tempfile = "3.15" +indexmap = "2.7" [features] cuda = ["xgboost-sys/cuda"] diff --git a/src/booster.rs b/src/booster.rs index 4e91689..33b0ffc 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -1,5 +1,5 @@ -use dmatrix::DMatrix; -use error::XGBError; +use crate::dmatrix::DMatrix; +use crate::error::XGBError; use libc; use std::collections::{BTreeMap, HashMap}; use std::io::{self, BufRead, BufReader, Write}; @@ -13,7 +13,7 @@ use tempfile; use xgboost_sys; use super::XGBResult; -use parameters::{BoosterParameters, TrainingParameters}; +use crate::parameters::{BoosterParameters, TrainingParameters}; pub type CustomObjective = fn(&[f32], &DMatrix) -> (Vec, Vec); @@ -365,13 +365,16 @@ impl Booster { let mut out_len = 0; let mut out = ptr::null_mut(); xgb_call!(xgboost_sys::XGBoosterGetAttrNames(self.handle, &mut out_len, &mut out))?; - - let out_ptr_slice = unsafe { slice::from_raw_parts(out, out_len as usize) }; - let out_vec = out_ptr_slice - .iter() - .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) - .collect(); - Ok(out_vec) + if out_len > 0 { + let out_ptr_slice = unsafe { slice::from_raw_parts(out, out_len as usize) }; + let out_vec = out_ptr_slice + .iter() + .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) + .collect(); + Ok(out_vec) + } else { + Ok(Vec::new()) + } } /// Predict results for given data. @@ -517,7 +520,7 @@ impl Booster { Err(err) => return Err(XGBError::new(err.to_string())), }; - let file_path = tmp_dir.path().join("fmap.txt"); + let file_path = tmp_dir.path().join("fmap.json"); let mut file: File = match File::create(&file_path) { Ok(f) => f, Err(err) => return Err(XGBError::new(err.to_string())), @@ -551,14 +554,18 @@ impl Booster { &mut out_dump_array ))?; - let out_ptr_slice = unsafe { slice::from_raw_parts(out_dump_array, out_len as usize) }; - let out_vec: Vec = out_ptr_slice - .iter() - .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) - .collect(); + if out_len > 0 { + let out_ptr_slice = unsafe { slice::from_raw_parts(out_dump_array, out_len as usize) }; + let out_vec: Vec = out_ptr_slice + .iter() + .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) + .collect(); - assert_eq!(out_len as usize, out_vec.len()); - Ok(out_vec.join("\n")) + assert_eq!(out_len as usize, out_vec.len()); + Ok(out_vec.join("\n")) + } else { + Ok(String::new()) + } } pub(crate) fn load_rabit_checkpoint(&self) -> XGBResult { @@ -721,7 +728,7 @@ impl fmt::Display for FeatureType { #[cfg(test)] mod tests { use super::*; - use parameters::{self, learning, tree}; + use crate::parameters::{self, learning, tree}; fn read_train_matrix() -> XGBResult { DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#) diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 98cf30a..3905b55 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -314,7 +314,11 @@ impl DMatrix { &mut out_dptr ))?; - Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) + if out_len > 0 { + Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) + } else { + Err(XGBError::new( "error")) + } } fn set_float_info(&mut self, field: &str, array: &[f32]) -> XGBResult<()> { diff --git a/xgboost-sys/.cargo/config b/xgboost-sys/.cargo/config.toml similarity index 100% rename from xgboost-sys/.cargo/config rename to xgboost-sys/.cargo/config.toml diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index b4603ff..b9749af 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -8,12 +8,13 @@ license = "MIT" repository = "https://github.com/davechallis/rust-xgboost" description = "Native bindings to the xgboost library" readme = "README.md" +edition = "2021" [dependencies] libc = "0.2" [build-dependencies] -bindgen = "0.69" +bindgen = "0.71" cmake = "0.1" [features] diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index c6ba1a6..7fc9a9a 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -25,8 +25,12 @@ fn main() { dst.define("BUILD_STATIC_LIB", "ON").define("CMAKE_CXX_STANDARD", "17"); // CMake + let mut dst = Config::new(&xgb_root); + let mut dst = dst.define("BUILD_STATIC_LIB", "ON"); + #[cfg(feature = "cuda")] - dst.define("USE_CUDA", "ON") + let mut dst = dst + .define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") .define("BUILD_WITH_CUDA_CUB", "ON"); @@ -34,7 +38,8 @@ fn main() { { let path = PathBuf::from("/opt/homebrew/"); // check for m1 vs intel config if let Ok(_dir) = std::fs::read_dir(&path) { - dst.define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + dst = dst + .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); @@ -54,9 +59,11 @@ fn main() { #[cfg(feature = "cuda")] let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); - let bindings = bindings.generate().expect("Unable to generate bindings."); + let bindings = bindings + .generate() + .expect("Unable to generate bindings."); - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + let out_path = PathBuf::from(out_dir); bindings .write_to_file(out_path.join("bindings.rs")) .expect("Couldn't write bindings."); diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 82d846b..5e64276 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 82d846bbeb83c652a0b1dff0e3519e67569c4a3d +Subproject commit 5e64276a9b95df57e6dd8f9e63347636f4e5d331 From 98604a006d8959f9c605deb122c0beb507dfd5c8 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 28 Jan 2025 17:52:49 -0800 Subject: [PATCH 35/36] rabit checkpoints removed upstream --- src/booster.rs | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index 33b0ffc..814101a 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -148,29 +148,8 @@ impl Booster { dmats }; - let mut bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; - // load distributed code checkpoint from rabit - let mut version = bst.load_rabit_checkpoint()?; - debug!("Loaded Rabit checkpoint: version={}", version); - assert!(unsafe { xgboost_sys::RabitGetWorldSize() != 1 || version == 0 }); - let start_iteration = version / 2; - for i in start_iteration..params.boost_rounds as i32 { - // distributed code: need to resume to this point - // skip first update if a recovery step - if version % 2 == 0 { - if let Some(objective_fn) = params.custom_objective_fn { - debug!("Boosting in round: {}", i); - bst.update_custom(params.dtrain, objective_fn)?; - } else { - debug!("Updating in round: {}", i); - bst.update(params.dtrain, i)?; - } - let _ = bst.save_rabit_checkpoint()?; - version += 1; - } - - assert!(unsafe { xgboost_sys::RabitGetWorldSize() == 1 || version == xgboost_sys::RabitVersionNumber() }); - + let bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; + for i in 0..params.boost_rounds as i32 { if let Some(eval_sets) = params.evaluation_sets { let mut dmat_eval_results = bst.eval_set(eval_sets, i)?; @@ -203,10 +182,6 @@ impl Booster { } println!(); } - - // do checkpoint after evaluation, in case evaluation also updates booster. - let _ = bst.save_rabit_checkpoint(); - version += 1; } Ok(bst) @@ -568,16 +543,6 @@ impl Booster { } } - pub(crate) fn load_rabit_checkpoint(&self) -> XGBResult { - let mut version = 0; - xgb_call!(xgboost_sys::XGBoosterLoadRabitCheckpoint(self.handle, &mut version))?; - Ok(version) - } - - pub(crate) fn save_rabit_checkpoint(&self) -> XGBResult<()> { - xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) - } - pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { let name = ffi::CString::new(name).unwrap(); let value = ffi::CString::new(value).unwrap(); From 4f0f3589974b7d7f142cab11efb8b43cccb0d3cb Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 28 Jan 2025 17:53:22 -0800 Subject: [PATCH 36/36] fmt --- src/booster.rs | 1 - src/dmatrix.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/booster.rs b/src/booster.rs index 814101a..a965b6d 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -711,7 +711,6 @@ mod tests { assert!(res.is_ok()); } - #[test] fn get_set_attr() { let mut booster = load_test_booster(); diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 3905b55..4c0b959 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -317,7 +317,7 @@ impl DMatrix { if out_len > 0 { Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) } else { - Err(XGBError::new( "error")) + Err(XGBError::new("error")) } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy