diff --git a/pgml-extension/Cargo.lock b/pgml-extension/Cargo.lock index c9db39e9b..1bec18628 100644 --- a/pgml-extension/Cargo.lock +++ b/pgml-extension/Cargo.lock @@ -1753,6 +1753,7 @@ dependencies = [ "heapless", "indexmap 2.1.0", "itertools 0.12.0", + "lazy_static", "lightgbm", "linfa", "linfa-linear", diff --git a/pgml-extension/Cargo.toml b/pgml-extension/Cargo.toml index 7aea7ba7c..f6514c134 100644 --- a/pgml-extension/Cargo.toml +++ b/pgml-extension/Cargo.toml @@ -49,6 +49,7 @@ serde = { version = "1.0" } serde_json = { version = "1.0", features = ["preserve_order"] } typetag = "0.2" xgboost = { git = "https://github.com/postgresml/rust-xgboost", branch = "master" } +lazy_static = "1.4.0" [dev-dependencies] pgrx-tests = "=0.11.3" diff --git a/pgml-extension/src/bindings/python/mod.rs b/pgml-extension/src/bindings/python/mod.rs index ba59bef8e..2e48052f3 100644 --- a/pgml-extension/src/bindings/python/mod.rs +++ b/pgml-extension/src/bindings/python/mod.rs @@ -6,11 +6,9 @@ use pgrx::*; use pyo3::prelude::*; use pyo3::types::PyTuple; -use crate::config::get_config; +use crate::config::PGML_VENV; use crate::create_pymodule; -static CONFIG_NAME: &str = "pgml.venv"; - create_pymodule!("/src/bindings/python/python.py"); pub fn activate_venv(venv: &str) -> Result { @@ -23,8 +21,8 @@ pub fn activate_venv(venv: &str) -> Result { } pub fn activate() -> Result { - match get_config(CONFIG_NAME) { - Some(venv) => activate_venv(&venv), + match PGML_VENV.1.get() { + Some(venv) => activate_venv(&venv.to_string_lossy()), None => Ok(false), } } diff --git a/pgml-extension/src/bindings/transformers/whitelist.rs b/pgml-extension/src/bindings/transformers/whitelist.rs index 0194180c0..ac7f3bf4d 100644 --- a/pgml-extension/src/bindings/transformers/whitelist.rs +++ b/pgml-extension/src/bindings/transformers/whitelist.rs @@ -1,13 +1,11 @@ use anyhow::{bail, Error}; +use pgrx::GucSetting; #[cfg(any(test, feature = "pg_test"))] use pgrx::{pg_schema, pg_test}; use serde_json::Value; +use std::ffi::CStr; -use crate::config::get_config; - -static CONFIG_HF_WHITELIST: &str = "pgml.huggingface_whitelist"; -static CONFIG_HF_TRUST_REMOTE_CODE_BOOL: &str = "pgml.huggingface_trust_remote_code"; -static CONFIG_HF_TRUST_WHITELIST: &str = "pgml.huggingface_trust_remote_code_whitelist"; +use crate::config::{PGML_HF_TRUST_REMOTE_CODE, PGML_HF_TRUST_WHITELIST, PGML_HF_WHITELIST}; /// Verify that the model in the task JSON is allowed based on the huggingface whitelists. pub fn verify_task(task: &Value) -> Result<(), Error> { @@ -15,33 +13,42 @@ pub fn verify_task(task: &Value) -> Result<(), Error> { Some(model) => model.to_string(), None => return Ok(()), }; - let whitelisted_models = config_csv_list(CONFIG_HF_WHITELIST); + let whitelisted_models = config_csv_list(&PGML_HF_WHITELIST.1); let model_is_allowed = whitelisted_models.is_empty() || whitelisted_models.contains(&task_model); if !model_is_allowed { - bail!("model {task_model} is not whitelisted. Consider adding to {CONFIG_HF_WHITELIST} in postgresql.conf"); + bail!( + "model {} is not whitelisted. Consider adding to {} in postgresql.conf", + task_model, + PGML_HF_WHITELIST.0 + ); } let task_trust = get_trust_remote_code(task); - let trust_remote_code = get_config(CONFIG_HF_TRUST_REMOTE_CODE_BOOL) - .map(|v| v == "true") - .unwrap_or(true); + let trust_remote_code = PGML_HF_TRUST_REMOTE_CODE.1.get(); - let trusted_models = config_csv_list(CONFIG_HF_TRUST_WHITELIST); + let trusted_models = config_csv_list(&PGML_HF_TRUST_WHITELIST.1); let model_is_trusted = trusted_models.is_empty() || trusted_models.contains(&task_model); let remote_code_allowed = trust_remote_code && model_is_trusted; if !remote_code_allowed && task_trust == Some(true) { - bail!("model {task_model} is not trusted to run remote code. Consider setting {CONFIG_HF_TRUST_REMOTE_CODE_BOOL} = 'true' or adding {task_model} to {CONFIG_HF_TRUST_WHITELIST}"); + bail!( + "model {} is not trusted to run remote code. Consider setting {} = 'true' or adding {} to {}", + task_model, + PGML_HF_TRUST_REMOTE_CODE.0, + task_model, + PGML_HF_TRUST_WHITELIST.0 + ); } Ok(()) } -fn config_csv_list(name: &str) -> Vec { - match get_config(name) { +fn config_csv_list(csv_list: &GucSetting>) -> Vec { + match csv_list.get() { Some(value) => value + .to_string_lossy() .trim_matches('"') .split(',') .filter_map(|s| if s.is_empty() { None } else { Some(s.to_string()) }) @@ -122,7 +129,7 @@ mod tests { #[pg_test] fn test_empty_whitelist() { let model = "Salesforce/xgen-7b-8k-inst"; - set_config(CONFIG_HF_WHITELIST, "").unwrap(); + set_config(PGML_HF_WHITELIST.0, "").unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_ok()); @@ -131,12 +138,12 @@ mod tests { #[pg_test] fn test_nonempty_whitelist() { let model = "Salesforce/xgen-7b-8k-inst"; - set_config(CONFIG_HF_WHITELIST, model).unwrap(); + set_config(PGML_HF_WHITELIST.0, model).unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_ok()); - set_config(CONFIG_HF_WHITELIST, "other_model").unwrap(); + set_config(PGML_HF_WHITELIST.0, "other_model").unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_err()); @@ -145,8 +152,8 @@ mod tests { #[pg_test] fn test_trusted_model() { let model = "Salesforce/xgen-7b-8k-inst"; - set_config(CONFIG_HF_WHITELIST, model).unwrap(); - set_config(CONFIG_HF_TRUST_WHITELIST, model).unwrap(); + set_config(PGML_HF_WHITELIST.0, model).unwrap(); + set_config(PGML_HF_TRUST_WHITELIST.0, model).unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); @@ -154,9 +161,9 @@ mod tests { let task_json = format!(json_template!(), model, true); let task: Value = serde_json::from_str(&task_json).unwrap(); - assert!(verify_task(&task).is_ok()); + assert!(verify_task(&task).is_err()); - set_config(CONFIG_HF_TRUST_REMOTE_CODE_BOOL, "true").unwrap(); + set_config(PGML_HF_TRUST_REMOTE_CODE.0, "true").unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_ok()); @@ -169,8 +176,8 @@ mod tests { #[pg_test] fn test_untrusted_model() { let model = "Salesforce/xgen-7b-8k-inst"; - set_config(CONFIG_HF_WHITELIST, model).unwrap(); - set_config(CONFIG_HF_TRUST_WHITELIST, "other_model").unwrap(); + set_config(PGML_HF_WHITELIST.0, model).unwrap(); + set_config(PGML_HF_TRUST_WHITELIST.0, "other_model").unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); @@ -180,7 +187,7 @@ mod tests { let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_err()); - set_config(CONFIG_HF_TRUST_REMOTE_CODE_BOOL, "true").unwrap(); + set_config(PGML_HF_TRUST_REMOTE_CODE.0, "true").unwrap(); let task_json = format!(json_template!(), model, false); let task: Value = serde_json::from_str(&task_json).unwrap(); assert!(verify_task(&task).is_ok()); diff --git a/pgml-extension/src/config.rs b/pgml-extension/src/config.rs index 8f9ade29a..b1f5f43ac 100644 --- a/pgml-extension/src/config.rs +++ b/pgml-extension/src/config.rs @@ -1,16 +1,58 @@ +use lazy_static::lazy_static; +use pgrx::{GucContext, GucFlags, GucRegistry, GucSetting}; use std::ffi::CStr; #[cfg(any(test, feature = "pg_test"))] use pgrx::{pg_schema, pg_test}; -use pgrx_pg_sys::AsPgCStr; -pub fn get_config(name: &str) -> Option { - // SAFETY: name is not null because it is a Rust reference. - let ptr = unsafe { pgrx_pg_sys::GetConfigOption(name.as_pg_cstr(), true, false) }; - (!ptr.is_null()).then(move || { - // SAFETY: assuming pgrx_pg_sys is providing a valid, null terminated pointer. - unsafe { CStr::from_ptr(ptr) }.to_string_lossy().to_string() - }) +lazy_static! { + pub static ref PGML_VENV: (&'static str, GucSetting>) = + ("pgml.venv", GucSetting::>::new(None)); + pub static ref PGML_HF_WHITELIST: (&'static str, GucSetting>) = ( + "pgml.huggingface_whitelist", + GucSetting::>::new(None), + ); + pub static ref PGML_HF_TRUST_REMOTE_CODE: (&'static str, GucSetting) = + ("pgml.huggingface_trust_remote_code", GucSetting::::new(false)); + pub static ref PGML_HF_TRUST_WHITELIST: (&'static str, GucSetting>) = ( + "pgml.huggingface_trust_remote_code_whitelist", + GucSetting::>::new(None), + ); +} + +pub fn initialize_server_params() { + GucRegistry::define_string_guc( + PGML_VENV.0, + "Python's virtual environment path", + "", + &PGML_VENV.1, + GucContext::Userset, + GucFlags::default(), + ); + GucRegistry::define_string_guc( + PGML_HF_WHITELIST.0, + "Models allowed to be downloaded from huggingface", + "", + &PGML_HF_WHITELIST.1, + GucContext::Userset, + GucFlags::default(), + ); + GucRegistry::define_bool_guc( + PGML_HF_TRUST_REMOTE_CODE.0, + "Whether model can execute remote codes", + "", + &PGML_HF_TRUST_REMOTE_CODE.1, + GucContext::Userset, + GucFlags::default(), + ); + GucRegistry::define_string_guc( + PGML_HF_TRUST_WHITELIST.0, + "Models allowed to execute remote codes when pgml.hugging_face_trust_remote_code = 'on'", + "", + &PGML_HF_TRUST_WHITELIST.1, + GucContext::Userset, + GucFlags::default(), + ); } #[cfg(any(test, feature = "pg_test"))] @@ -26,17 +68,11 @@ pub fn set_config(name: &str, value: &str) -> Result<(), pgrx::spi::Error> { mod tests { use super::*; - #[pg_test] - fn read_config_max_connections() { - let name = "max_connections"; - assert_eq!(get_config(name), Some("100".into())); - } - #[pg_test] fn read_pgml_huggingface_whitelist() { let name = "pgml.huggingface_whitelist"; let value = "meta-llama/Llama-2-7b"; set_config(name, value).unwrap(); - assert_eq!(get_config(name), Some(value.into())); + assert_eq!(PGML_HF_WHITELIST.1.get().unwrap().to_string_lossy(), value); } } diff --git a/pgml-extension/src/lib.rs b/pgml-extension/src/lib.rs index 6c2884cee..4cc27322e 100644 --- a/pgml-extension/src/lib.rs +++ b/pgml-extension/src/lib.rs @@ -24,6 +24,7 @@ extension_sql_file!("../sql/schema.sql", name = "schema"); #[cfg(not(feature = "use_as_lib"))] #[pg_guard] pub extern "C" fn _PG_init() { + config::initialize_server_params(); bindings::python::activate().expect("Error setting python venv"); orm::project::init(); } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy