From c72cb30655216db4818ec4e3e3f175b55b5b6cd7 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 24 Apr 2024 15:59:30 -0700 Subject: [PATCH 01/19] Disorganized but working c --- pgml-sdks/pgml/.gitignore | 5 + pgml-sdks/pgml/Cargo.lock | 2 +- pgml-sdks/pgml/Cargo.toml | 4 + pgml-sdks/pgml/go/Makefile | 31 ++ pgml-sdks/pgml/go/go.mod | 3 + pgml-sdks/pgml/go/pgml.go | 23 + pgml-sdks/pgml/go/test.c | 37 ++ pgml-sdks/pgml/src/builtins.rs | 5 +- pgml-sdks/pgml/src/collection.rs | 32 ++ pgml-sdks/pgml/src/languages/c.rs | 18 + pgml-sdks/pgml/src/languages/mod.rs | 3 + pgml-sdks/pgml/src/model.rs | 5 +- pgml-sdks/pgml/src/open_source_ai.rs | 17 +- pgml-sdks/pgml/src/pipeline.rs | 3 + pgml-sdks/pgml/src/query_runner.rs | 21 +- pgml-sdks/pgml/src/splitter.rs | 5 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 5 +- .../rust-bridge/rust-bridge-macros/src/c.rs | 448 ++++++++++++++++++ .../rust-bridge/rust-bridge-macros/src/lib.rs | 5 + .../rust-bridge/rust-bridge-traits/src/c.rs | 157 ++++++ .../rust-bridge/rust-bridge-traits/src/lib.rs | 4 +- 21 files changed, 804 insertions(+), 29 deletions(-) create mode 100644 pgml-sdks/pgml/go/Makefile create mode 100644 pgml-sdks/pgml/go/go.mod create mode 100644 pgml-sdks/pgml/go/pgml.go create mode 100644 pgml-sdks/pgml/go/test.c create mode 100644 pgml-sdks/pgml/src/languages/c.rs create mode 100644 pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs create mode 100644 pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs diff --git a/pgml-sdks/pgml/.gitignore b/pgml-sdks/pgml/.gitignore index 2d5a692e0..e82a5d1fb 100644 --- a/pgml-sdks/pgml/.gitignore +++ b/pgml-sdks/pgml/.gitignore @@ -167,3 +167,8 @@ cython_debug/ # local scratch pad scratch.sql scratch.py + +# Some SDK specific things +expanded.rs +test +pgml.h diff --git a/pgml-sdks/pgml/Cargo.lock b/pgml-sdks/pgml/Cargo.lock index 11128b907..202436a71 100644 --- a/pgml-sdks/pgml/Cargo.lock +++ b/pgml-sdks/pgml/Cargo.lock @@ -1531,7 +1531,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pgml" -version = "1.0.0" +version = "1.0.2" dependencies = [ "anyhow", "async-trait", diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index e78e7413a..89c2f6275 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -13,6 +13,9 @@ keywords = ["postgres", "machine learning", "vector databases", "embeddings"] name = "pgml" crate-type = ["lib", "cdylib"] +[rust-analyzer.checkOnSave] +extraArgs = ["--target-dir", "/path/to/proect/target/check"] + [dependencies] rust_bridge = {path = "../rust-bridge/rust-bridge", version = "0.1.0"} sqlx = { version = "0.7.3", features = [ "runtime-tokio-rustls", "postgres", "json", "time", "uuid"] } @@ -50,3 +53,4 @@ once_cell = "1.19.0" default = [] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] +c = [] diff --git a/pgml-sdks/pgml/go/Makefile b/pgml-sdks/pgml/go/Makefile new file mode 100644 index 000000000..a8d614023 --- /dev/null +++ b/pgml-sdks/pgml/go/Makefile @@ -0,0 +1,31 @@ +BINARY_NAME=pgml + +build: + cargo build --features c + cargo expand --features c > expanded.rs + cbindgen --lang C -o pgml.h expanded.rs + # GOARCH=amd64 GOOS=darwin go build -o ${BINARY_NAME}-darwin main.go + GOARCH=amd64 GOOS=linux go build -o ${BINARY_NAME}-linux pgml.go + # GOARCH=amd64 GOOS=windows go build -o ${BINARY_NAME}-windows main.go + +build_test: + cargo build --features c + cargo expand --features c > expanded.rs + cbindgen --lang C -o pgml.h expanded.rs + gcc test.c -o test -l pgml -L ./../target/debug + +test: build_test + LD_LIBRARY_PATH=./../target/debug ./test + +test_c: + gcc test.c -o test -l pgml -L ./../target/debug + LD_LIBRARY_PATH=./../target/debug ./test + +run: build + LD_LIBRARY_PATH=./../target/debug ./${BINARY_NAME}-linux + +clean: + go clean + # rm ${BINARY_NAME}-darwin + rm ${BINARY_NAME}-linux + # rm ${BINARY_NAME}-windows diff --git a/pgml-sdks/pgml/go/go.mod b/pgml-sdks/pgml/go/go.mod new file mode 100644 index 000000000..6b1511192 --- /dev/null +++ b/pgml-sdks/pgml/go/go.mod @@ -0,0 +1,3 @@ +module pgml + +go 1.22.2 diff --git a/pgml-sdks/pgml/go/pgml.go b/pgml-sdks/pgml/go/pgml.go new file mode 100644 index 000000000..e22b91dd6 --- /dev/null +++ b/pgml-sdks/pgml/go/pgml.go @@ -0,0 +1,23 @@ +package main + +/* +#cgo LDFLAGS: -l pgml -L ./../target/debug +#include "pgml.h" +*/ +import "C" + +import ( + "unsafe" +) + +type Collection struct { + collection *C.CollectionC +} + +func main() { + c_string_p := C.CString("Test CString") + defer C.free(unsafe.Pointer(c_string_p)) + collection := C.new_collection(c_string_p) + C.test_collection(collection) + defer C.free_collection(collection) +} diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/go/test.c new file mode 100644 index 000000000..9992fd19d --- /dev/null +++ b/pgml-sdks/pgml/go/test.c @@ -0,0 +1,37 @@ +#include + +#include "pgml.h" + +int main() { + // Create the Collection and Pipeline + CollectionC * collection = CollectionC_new("test_c", NULL); + PipelineC * pipeline = PipelineC_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); + + // Add the Pipeline to the Collection + CollectionC_add_pipeline(collection, pipeline); + + // Upsert the documents + char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; + CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); + + // Retrieve the documents + unsigned long r_size = 0; + char** documents = CollectionC_get_documents(collection, NULL, &r_size); + + // Print the documents + printf("\n\nPrinting documents:\n"); + int i; + for (i = 0; i < r_size; i++) { + printf("Document %u -> %s\n", i, documents[i]); + } + + // Search over the documents + r_size = 0; + char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); + printf("\n\nPrinting results:\n"); + for (i = 0; i < r_size; i++) { + printf("Result %u -> %s\n", i, results[i]); + } + + return 0; +} diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index 652bf0b8c..b23f4d699 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -3,7 +3,8 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Builtins { database_url: Option, } @@ -13,7 +14,7 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json}; #[cfg(feature = "python")] use crate::{query_runner::QueryRunnerPython, types::JsonPython}; -#[alias_methods(new, query, transform)] +// #[alias_methods(new, query, transform)] impl Builtins { pub fn new(database_url: Option) -> Self { Self { database_url } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index f8107d050..ba24420ab 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -33,6 +33,9 @@ use crate::{ #[cfg(feature = "python")] use crate::{pipeline::PipelinePython, query_builder::QueryBuilderPython, types::JsonPython}; +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, pipeline::PipelineC, query_builder::QueryBuilderC}; + /// Our project tasks #[derive(Debug, Clone)] pub enum ProjectTask { @@ -99,6 +102,35 @@ pub(crate) struct CollectionDatabaseData { pub project_info: ProjectInfo, } +// #[repr(C)] +// pub struct CollectionC { +// pub collection: *mut Collection, +// } + +// #[no_mangle] +// pub unsafe extern "C" fn new_collection(name: *const std::ffi::c_char) -> *mut CollectionC { +// let name = std::ffi::CStr::from_ptr(name).to_str().unwrap(); +// println!("Nice one Silas: {}", name); +// let collection = Box::into_raw(Box::new(Collection::new(name, None).unwrap())); +// Box::into_raw(Box::new(CollectionC { collection })) +// } + +// #[no_mangle] +// pub unsafe extern "C" fn free_collection(collection: *mut CollectionC) { +// if collection.is_null() { +// return; +// } +// drop(Box::from_raw(collection)); +// } + +// #[no_mangle] +// pub unsafe extern "C" fn test_collection(collection: *mut CollectionC) { +// let collection: *mut Collection = (*collection).collection; +// let collection: Collection = (*collection).clone(); +// println!("Nice one Silas x two: {}", collection.name); +// println!("test"); +// } + /// A collection of documents #[derive(alias, Debug, Clone)] pub struct Collection { diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs new file mode 100644 index 000000000..3babf097b --- /dev/null +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -0,0 +1,18 @@ +use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; +use rust_bridge::c::CustomInto; + +pub type JsonC = std::ffi::c_char; + +unsafe impl CustomInto for *mut JsonC { + unsafe fn custom_into(self) -> Json { + let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); + serde_json::from_str::(s).unwrap().into() + } +} + +unsafe impl CustomInto<*mut JsonC> for Json { + unsafe fn custom_into(self) -> *mut JsonC { + let s = serde_json::to_string(&self).unwrap(); + std::ffi::CString::new(s).unwrap().into_raw() + } +} diff --git a/pgml-sdks/pgml/src/languages/mod.rs b/pgml-sdks/pgml/src/languages/mod.rs index dda671ec1..43340b02b 100644 --- a/pgml-sdks/pgml/src/languages/mod.rs +++ b/pgml-sdks/pgml/src/languages/mod.rs @@ -3,3 +3,6 @@ pub mod javascript; #[cfg(feature = "python")] pub mod python; + +#[cfg(feature = "c")] +pub mod c; diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index 432654298..a5eb75552 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -52,7 +52,8 @@ pub(crate) struct ModelDatabaseData { } /// A model used for embedding, inference, etc... -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Model { pub(crate) name: String, pub(crate) runtime: ModelRuntime, @@ -66,7 +67,7 @@ impl Default for Model { } } -#[alias_methods(new, transform)] +// #[alias_methods(new, transform)] impl Model { /// Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index e21397a31..7687c289f 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -14,7 +14,8 @@ use crate::{ use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython}; /// A drop in replacement for OpenAI -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct OpenSourceAI { database_url: Option, } @@ -162,13 +163,13 @@ impl Iterator for AsyncToSyncJsonIterator { } } -#[alias_methods( - new, - chat_completions_create, - chat_completions_create_async, - chat_completions_create_stream, - chat_completions_create_stream_async -)] +// #[alias_methods( +// new, +// chat_completions_create, +// chat_completions_create_async, +// chat_completions_create_stream, +// chat_completions_create_stream_async +// )] impl OpenSourceAI { /// Creates a new [OpenSourceAI] /// diff --git a/pgml-sdks/pgml/src/pipeline.rs b/pgml-sdks/pgml/src/pipeline.rs index 02b059db3..e082e9e4b 100644 --- a/pgml-sdks/pgml/src/pipeline.rs +++ b/pgml-sdks/pgml/src/pipeline.rs @@ -19,6 +19,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + type ParsedSchema = HashMap; #[derive(Deserialize)] diff --git a/pgml-sdks/pgml/src/query_runner.rs b/pgml-sdks/pgml/src/query_runner.rs index 623a09662..e4c8df750 100644 --- a/pgml-sdks/pgml/src/query_runner.rs +++ b/pgml-sdks/pgml/src/query_runner.rs @@ -17,22 +17,23 @@ enum BindValue { Json(Json), } -#[derive(alias, Clone, Debug)] +// #[derive(alias, Clone, Debug)] +#[derive(Clone, Debug)] pub struct QueryRunner { query: String, bind_values: Vec, database_url: Option, } -#[alias_methods( - fetch_all, - execute, - bind_string, - bind_int, - bind_float, - bind_bool, - bind_json -)] +// #[alias_methods( +// fetch_all, +// execute, +// bind_string, +// bind_int, +// bind_float, +// bind_bool, +// bind_json +// )] impl QueryRunner { pub fn new(query: &str, database_url: Option) -> Self { Self { diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs index a0847c879..091e1a21a 100644 --- a/pgml-sdks/pgml/src/splitter.rs +++ b/pgml-sdks/pgml/src/splitter.rs @@ -19,7 +19,8 @@ pub(crate) struct SplitterDatabaseData { } /// A text splitter -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Splitter { pub(crate) name: String, pub(crate) parameters: Json, @@ -32,7 +33,7 @@ impl Default for Splitter { } } -#[alias_methods(new)] +// #[alias_methods(new)] impl Splitter { /// Creates a new [Splitter] /// diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index 7a6141675..bd50844c2 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -4,7 +4,8 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct TransformerPipeline { task: Json, database_url: Option, @@ -16,7 +17,7 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, JsonPython}; -#[alias_methods(new, transform, transform_stream)] +// #[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { /// Creates a new [TransformerPipeline] /// diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs new file mode 100644 index 000000000..4cd6921cf --- /dev/null +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -0,0 +1,448 @@ +use proc_macro2::Ident; +use quote::{format_ident, quote, ToTokens}; +use std::{ + io::{Read, Write}, + str::FromStr, +}; +use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; + +use crate::{ + common::{AttributeArgs, GetImplMethod}, + types::{OutputType, SupportedType}, +}; + +pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { + let name_ident = format_ident!("{}C", parsed.ident); + let wrapped_type_ident = parsed.ident; + let wrapped_type_name = wrapped_type_ident.to_string(); + + let expanded = quote! { + #[repr(C)] + #[cfg(feature = "c")] + pub struct #name_ident { + pub wrapped: *mut #wrapped_type_ident + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<*mut #name_ident> for #wrapped_type_ident { + unsafe fn custom_into(self) -> *mut #name_ident { + Box::into_raw(Box::new( + #name_ident { + wrapped: Box::into_raw(Box::new(self)) + } + )) + } + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + Box::leak(Box::from_raw(c.wrapped)) + } + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<&'static #wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> &'static #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + &*Box::leak(Box::from_raw(c.wrapped)) + } + } + }; + + proc_macro::TokenStream::from(expanded) +} + +pub fn generate_c_methods( + parsed: ItemImpl, + attribute_args: &AttributeArgs, +) -> proc_macro::TokenStream { + let mut methods = Vec::new(); + + let wrapped_type_ident = match *parsed.self_ty { + Type::Path(p) => p.path.segments.first().unwrap().ident.clone(), + _ => panic!("Error getting struct ident for impl block"), + }; + let name_ident = format_ident!("{}C", wrapped_type_ident); + + for item in parsed.items { + // We only create methods for functions listed in the attribute args + match &item { + syn::ImplItem::Fn(f) => { + let method_name = f.sig.ident.to_string(); + if !attribute_args.args.contains(&method_name) { + continue; + } + } + _ => continue, + } + + // Get ImplMethod details - see: https://docs.rs/syn/latest/syn/visit/index.html + let mut method = GetImplMethod::default(); + method.visit_impl_item(&item); + if !method.exists { + continue; + } + let method_ident = method.method_ident.clone(); + + let ( + go_function_arguments, + go_arguments_prep, + mut c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); + + let method_name = format_ident!("{}_{}", name_ident, method_ident); + + let (return_part, augment_r_size) = + rust_output_to_c_output(&wrapped_type_ident, &method.output_type); + + if augment_r_size { + c_function_arguments.extend(quote! { + , r_size: *mut std::ffi::c_ulong + }) + } + + let async_part = if method.is_async { + quote! { .await } + } else { + quote! {} + }; + + let (ret_part, augment_part) = if augment_r_size { + ( + quote! { let (ret, ar_size) }, + quote! {*r_size = ar_size as std::ffi::c_ulong; }, + ) + } else { + (quote! { let ret }, quote! {}) + }; + + let rust_call_part = match &method.output_type { + crate::types::OutputType::Result(_) => { + quote! { + #ret_part = #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part.unwrap().custom_into(); + #augment_part + ret + } + } + crate::types::OutputType::Default => quote! { + #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part; + }, + crate::types::OutputType::Other(_) => quote! { + #ret_part = #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part.custom_into(); + #augment_part + ret + }, + }; + + let method = if method.is_async { + quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(#c_function_arguments) #return_part { + use rust_bridge::c::CustomInto; + use rust_bridge::c::CustomIntoVec; + crate::get_or_set_runtime().block_on(async move { + #c_argument_prep + #rust_call_part + }) + } + } + } else { + quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(#c_function_arguments) #return_part { + use rust_bridge::c::CustomInto; + use rust_bridge::c::CustomIntoVec; + #c_argument_prep + #rust_call_part + } + } + }; + + methods.push(method); + } + + proc_macro::TokenStream::from(quote! { + #(#methods)* + }) +} + +fn get_method_arguments( + wrapped_type_ident: &Ident, + name_ident: &Ident, + method: &GetImplMethod, +) -> ( + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, +) { + let mut go_function_arguments = Vec::new(); + let mut go_arguments_prep = Vec::new(); + let mut c_function_arguments = Vec::new(); + let mut c_argument_prep = Vec::new(); + let mut rust_function_arguments = Vec::new(); + + if let Some(_receiver) = &method.receiver { + c_function_arguments.push(format!("s: *mut {name_ident}")); + c_argument_prep.push(format!( + "let s: &mut {wrapped_type_ident} = s.custom_into();" + )); + rust_function_arguments.push("s".to_string()); + } + + for (argument_name, argument_type) in &method.method_arguments { + let ( + go_function_arguments_, + go_arguments_prep_, + c_function_arguments_, + c_argument_prep_, + rust_function_arguments_, + ) = get_c_types(argument_name, argument_type); + + go_function_arguments.push(go_function_arguments_); + go_arguments_prep.push(go_arguments_prep_); + c_function_arguments.push(c_function_arguments_); + c_argument_prep.push(c_argument_prep_); + rust_function_arguments.push(rust_function_arguments_); + } + + ( + proc_macro2::TokenStream::from_str(&go_function_arguments.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&go_arguments_prep.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&c_function_arguments.join(",")).unwrap(), + proc_macro2::TokenStream::from_str(&c_argument_prep.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&rust_function_arguments.join(",")).unwrap(), + ) +} + +// Need: +// - go function arguments +// - go function argument prep for calling c function +// - go conversion from c returned value - For custom types this is always a wrapper for everything else this is a primitve type +// - c function arguments +// - c function arguments prep for calling rust function +// - arguments to call rust function with +// - c conversion from rust returned value - This is done with the into trait +fn get_c_types( + argument_name: &str, + ty: &SupportedType, +) -> (String, String, String, String, String) { + let t = ty.to_language_string(&None); + let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); + match ty { + SupportedType::Reference(r) => { + let ( + go_function_arguments, + go_argument_prep, + c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, &r.ty); + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::str | SupportedType::String => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: *mut std::ffi::c_char"), + c_to_rust, + argument_name.to_string(), + ), + SupportedType::Option(r) => { + let ( + go_function_arguments, + go_argument_prep, + mut c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, &r); + + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::bool => ( + "".to_string(), + "".to_string(), + "bool".to_string(), + "".to_string(), + argument_name.to_string(), + ), + SupportedType::Vec(v) => { + let ( + go_function_arguments, + go_argument_prep, + mut c_function_arguments, + mut c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, v); + + let mut c_function_arguments = c_function_arguments.replacen("*mut", "*mut *mut", 1); + c_function_arguments.push_str(", v_size: std::ffi::c_ulong"); + c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); + let c_to_rust = + format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); + + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), + SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), + SupportedType::S => unreachable!(), + SupportedType::i64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_longlong"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::u64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_ulonglong"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::i32 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_long"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::f64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_double"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::CustomType(s) => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: *mut {s}C"), + c_to_rust, + argument_name.to_string(), + ), + _ => todo!(), + } +} + +// fn get_c_types(argument_name: &str, ty: &SupportedType) -> (String, Option) { +// match ty { +// SupportedType::Reference(r) => get_c_types(&r.ty), +// SupportedType::str | SupportedType::String => ("*mut std::ffi::c_char".to_string(), None), +// SupportedType::bool => ("bool".to_string(), None), +// SupportedType::Vec(v) => { +// let mut v = get_c_types(v); +// if !v.0.contains('*') { +// v.0 = format!("*mut {}", v.0); +// } +// if v.1.is_some() { +// panic!("Vec> not supported in c"); +// } +// (v.0, Some("std::ffi::c_ulong".to_string())) +// } +// SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), +// SupportedType::Option(r) => { +// let mut t = get_c_types(r); +// if !t.0.contains('*') { +// t.0 = format!("*mut {}", t.0); +// } +// t +// } +// SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), +// SupportedType::S => unreachable!(), +// SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), +// SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), +// SupportedType::i32 => ("std::ffi::c_long".to_string(), None), +// SupportedType::f64 => ("std::ffi::c_double".to_string(), None), +// SupportedType::CustomType(s) => (format!("*mut {s}"), None), +// } +// } + +fn rust_type_to_c_type( + wrapped_type_ident: &Ident, + ty: &SupportedType, +) -> Option<(proc_macro2::TokenStream, bool)> { + match ty { + // SupportedType::Reference(r) => rust_type_to_c_type(wrapped_type_ident, &r.ty), + SupportedType::str | SupportedType::String => Some((quote! {*mut std::ffi::c_char}, false)), + SupportedType::bool => Some((quote! { bool }, false)), + SupportedType::Vec(v) => { + let (ty, _) = rust_type_to_c_type(wrapped_type_ident, v).unwrap(); + Some((quote! { *mut #ty }, true)) + } + // SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), + // SupportedType::Option(r) => { + // let mut t = get_c_types(r); + // if !t.0.contains('*') { + // t.0 = format!("*mut {}", t.0); + // } + // t + // } + SupportedType::Tuple(t) => { + if !t.is_empty() { + panic!("Tuple arguments not supported in c") + } else { + None + } + } + SupportedType::S => { + let ty = format_ident!("{wrapped_type_ident}C"); + Some((quote! { *mut #ty }, false)) + } // SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), + // SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), + // SupportedType::i32 => ("std::ffi::c_long".to_string(), None), + // SupportedType::f64 => ("std::ffi::c_double".to_string(), None), + SupportedType::CustomType(s) => { + let ty = format_ident!("{s}C"); + Some((quote! {*mut #ty}, false)) + } + _ => panic!("rust_type_to_c_type not implemented for {:?}", ty), + } +} + +fn rust_output_to_c_output( + wrapped_type_ident: &Ident, + output: &OutputType, +) -> (proc_macro2::TokenStream, bool) { + match output { + crate::types::OutputType::Result(r) => { + if let Some((ty, augment_r_size)) = rust_type_to_c_type(wrapped_type_ident, r) { + (quote! { -> #ty }, augment_r_size) + } else { + (quote! {}, false) + } + } + crate::types::OutputType::Default => (quote! {}, false), + crate::types::OutputType::Other(r) => { + if let Some((ty, augment_r_size)) = rust_type_to_c_type(wrapped_type_ident, r) { + (quote! { -> #ty }, augment_r_size) + } else { + (quote! {}, false) + } + } + } +} diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs index e6dc81c73..467fcf08f 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs @@ -1,5 +1,6 @@ use syn::{parse_macro_input, DeriveInput, ItemImpl}; +mod c; mod common; mod javascript; mod python; @@ -11,9 +12,11 @@ pub fn alias(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let parsed = parse_macro_input!(input as DeriveInput); let python_tokens = python::generate_python_alias(parsed.clone()); + let c_tokens = c::generate_c_alias(parsed.clone()); let javascript_tokens = javascript::generate_javascript_alias(parsed); output.extend(python_tokens); + output.extend(c_tokens); output.extend(javascript_tokens); output } @@ -29,9 +32,11 @@ pub fn alias_methods( let parsed: ItemImpl = syn::parse(input).unwrap(); let python_tokens = python::generate_python_methods(parsed.clone(), &attribute_args); + let c_tokens = c::generate_c_methods(parsed.clone(), &attribute_args); let javascript_tokens = javascript::generate_javascript_methods(parsed, &attribute_args); output.extend(python_tokens); + output.extend(c_tokens); output.extend(javascript_tokens); output } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs new file mode 100644 index 000000000..ddb7f3650 --- /dev/null +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -0,0 +1,157 @@ +use std::collections::HashMap; + +/// Very similar to the `Into` trait, but we can implement it on foreign types. +pub unsafe trait CustomInto { + unsafe fn custom_into(self) -> T; +} + +pub unsafe trait CustomIntoVec { + unsafe fn custom_into_vec(self, size: usize) -> Vec; +} + +// unsafe impl> CustomIntoVec for *mut T2 { +unsafe impl CustomIntoVec for *mut *mut T2 +where + *mut T2: CustomInto, +{ + unsafe fn custom_into_vec(self, size: usize) -> Vec { + let mut result = vec![]; + let strings = std::slice::from_raw_parts_mut(self, size); + for s in strings { + let res = s.custom_into(); + result.push(res) + } + result + } +} + +unsafe impl<'a> CustomInto<&'a str> for *mut std::ffi::c_char { + unsafe fn custom_into(self) -> &'a str { + std::ffi::CStr::from_ptr(self).to_str().unwrap() + } +} + +unsafe impl CustomInto for *mut std::ffi::c_char { + unsafe fn custom_into(self) -> String { + std::ffi::CStr::from_ptr(self).to_str().unwrap().to_string() + } +} + +unsafe impl CustomInto<*mut std::ffi::c_char> for String { + unsafe fn custom_into(self) -> *mut std::ffi::c_char { + std::ffi::CString::new(self).unwrap().into_raw() + } +} + +unsafe impl CustomInto> for *mut T2 +where + *mut T2: CustomInto, +{ + unsafe fn custom_into(self) -> Option { + if self.is_null() { + None + } else { + Some(self.custom_into()) + } + } +} + +unsafe impl CustomInto<(*mut T1, usize)> for Vec +where + T2: CustomInto, +{ + unsafe fn custom_into(self) -> (*mut T1, usize) { + let size = self.len(); + let v: Vec = self.into_iter().map(|v| v.custom_into()).collect(); + (v.leak().as_mut_ptr(), size) + } +} + +macro_rules! gen_custom_into { + ($t1:ty) => { + unsafe impl CustomInto<$t1> for $t1 { + unsafe fn custom_into(self) -> $t1 { + self + } + } + }; // (($($T1:ident),+), ($($T2:ident),+), ($($C:tt),+)) => { + // impl<$($T1, $T2: CustomInto<$T1>),+> CustomInto<($($T1),+,)> for ($($T2),+,) { + // fn custom_into(self) -> ($($T1),+,) { + // ($(self.$C.custom_into()),+,) + // } + // } + // } +} + +gen_custom_into!(()); +gen_custom_into!(bool); + +// impl> CustomInto> for Option { +// fn custom_into(self) -> Option { +// self.map(|s| s.custom_into()) +// } +// } + +unsafe impl> CustomInto> for Vec { + unsafe fn custom_into(self) -> Vec { + self.into_iter().map(|x| x.custom_into()).collect() + } +} + +// impl, T2: CustomInto> +// CustomInto> for HashMap +// { +// fn custom_into(self) -> HashMap { +// self.into_iter() +// .map(|(k, v)| (k.custom_into(), v.custom_into())) +// .collect() +// } +// } + +// impl CustomInto<&'static str> for &str { +// fn custom_into(self) -> &'static str { +// // This is how we get around the liftime checker +// unsafe { +// let ptr = self as *const str; +// let ptr = ptr as *mut str; +// let boxed = Box::from_raw(ptr); +// Box::leak(boxed) +// } +// } +// } + +// gen_custom_into!((T1), (TT2), (0)); +// gen_custom_into!((T1, T2), (TT1, TT2), (0, 1)); +// gen_custom_into!((T1, T2, T3), (TT1, TT2, TT3), (0, 1, 2)); +// gen_custom_into!((T1, T2, T3, T4), (TT1, TT2, TT3, TT4), (0, 1, 2, 3)); +// gen_custom_into!( +// (T1, T2, T3, T4, T5), +// (TT1, TT2, TT3, TT4, TT5), +// (0, 1, 2, 3, 4) +// ); +// gen_custom_into!( +// (T1, T2, T3, T4, T5, T6), +// (TT1, TT2, TT3, TT4, TT5, TT6), +// (0, 1, 2, 3, 4, 5) +// ); + +// // There are some restrictions I cannot figure out around conflicting trait +// // implimentations so this is my solution for now +// gen_custom_into!(String); + +// gen_custom_into!(()); + +// gen_custom_into!(bool); + +// gen_custom_into!(i8); +// gen_custom_into!(i16); +// gen_custom_into!(i32); +// gen_custom_into!(i64); + +// gen_custom_into!(u8); +// gen_custom_into!(u16); +// gen_custom_into!(u32); +// gen_custom_into!(u64); + +// gen_custom_into!(f32); +// gen_custom_into!(f64); diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs index 351c28c06..7cba7c727 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs @@ -1,3 +1,3 @@ -pub mod python; - +pub mod c; pub mod javascript; +pub mod python; From 5d276fc4f419b122850152c094ef0404f9255253 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:22:29 -0700 Subject: [PATCH 02/19] Working C --- pgml-sdks/pgml/go/test.c | 8 ++ pgml-sdks/pgml/src/builtins.rs | 8 +- pgml-sdks/pgml/src/languages/c.rs | 82 +++++++++++++ pgml-sdks/pgml/src/model.rs | 8 +- pgml-sdks/pgml/src/open_source_ai.rs | 23 ++-- pgml-sdks/pgml/src/query_builder.rs | 5 +- pgml-sdks/pgml/src/query_runner.rs | 24 ++-- pgml-sdks/pgml/src/splitter.rs | 8 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 8 +- .../rust-bridge/rust-bridge-macros/src/c.rs | 112 +++++++++-------- .../rust-bridge-macros/src/common.rs | 115 ++++++++++++++++-- .../rust-bridge-macros/src/javascript.rs | 5 +- .../rust-bridge-macros/src/python.rs | 4 +- .../rust-bridge/rust-bridge-traits/src/c.rs | 13 +- 14 files changed, 327 insertions(+), 96 deletions(-) diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/go/test.c index 9992fd19d..3fc1d053f 100644 --- a/pgml-sdks/pgml/go/test.c +++ b/pgml-sdks/pgml/go/test.c @@ -33,5 +33,13 @@ int main() { printf("Result %u -> %s\n", i, results[i]); } + // Test the TransformerPipeline + TransformerPipelineC * t_pipeline = TransformerPipelineC_new("text-generation", "TheBloke/zephyr-7B-beta-GPTQ", "{\"revision\": \"main\"}", "postgres://pg:ml@sql.cloud.postgresml.org:38042/pgml"); + GeneralJsonAsyncIteratorC * t_pipeline_iter = TransformerPipelineC_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); + while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { + char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + printf("Token -> %s\n", res); + } + return 0; } diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index b23f4d699..638e63353 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -3,8 +3,7 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Builtins { database_url: Option, } @@ -14,7 +13,10 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json}; #[cfg(feature = "python")] use crate::{query_runner::QueryRunnerPython, types::JsonPython}; -// #[alias_methods(new, query, transform)] +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, query_runner::QueryRunnerC}; + +#[alias_methods(new, query, transform)] impl Builtins { pub fn new(database_url: Option) -> Self { Self { database_url } diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 3babf097b..78bafd858 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -1,11 +1,15 @@ use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; +use futures::pin_mut; +use futures::stream::Stream; use rust_bridge::c::CustomInto; +use std::pin::Pin; pub type JsonC = std::ffi::c_char; unsafe impl CustomInto for *mut JsonC { unsafe fn custom_into(self) -> Json { let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); + eprintln!("\nABOU TO DECODE: {}\n", s); serde_json::from_str::(s).unwrap().into() } } @@ -16,3 +20,81 @@ unsafe impl CustomInto<*mut JsonC> for Json { std::ffi::CString::new(s).unwrap().into_raw() } } + +#[repr(C)] +pub struct GeneralJsonIteratorC { + pub wrapped: + *mut std::iter::Peekable> + Send>>, +} + +unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { + unsafe fn custom_into(self) -> *mut GeneralJsonIteratorC { + Box::into_raw(Box::new(GeneralJsonIteratorC { + wrapped: Box::into_raw(Box::new(self.0.peekable())), + })) + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { + let mut c = Box::leak(Box::from_raw(iterator)); + if let Some(_) = (*c.wrapped).peek() { + false + } else { + true + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonIteratorC_next( + iterator: *mut GeneralJsonIteratorC, +) -> *mut JsonC { + let c = Box::leak(Box::from_raw(iterator)); + let b = Box::leak(Box::from_raw(c.wrapped)); + (*b).next().unwrap().unwrap().custom_into() +} + +#[repr(C)] +pub struct GeneralJsonAsyncIteratorC { + pub wrapped: *mut futures::stream::Peekable< + Pin> + Send>>, + >, +} + +unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { + unsafe fn custom_into(self) -> *mut GeneralJsonAsyncIteratorC { + use futures::stream::StreamExt; + Box::into_raw(Box::new(GeneralJsonAsyncIteratorC { + wrapped: Box::into_raw(Box::new(self.0.peekable())), + })) + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( + iterator: *mut GeneralJsonAsyncIteratorC, +) -> bool { + crate::get_or_set_runtime().block_on(async move { + use futures::stream::StreamExt; + let c = Box::leak(Box::from_raw(iterator)); + let s = Box::leak(Box::from_raw(c.wrapped)); + let mut s = Pin::new(s); + let res = s.as_mut().peek_mut().await; + if let Some(res) = res { + false + } else { + true + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( + iterator: *mut GeneralJsonAsyncIteratorC, +) -> *mut JsonC { + crate::get_or_set_runtime().block_on(async move { + use futures::stream::StreamExt; + let mut c = Box::leak(Box::from_raw(iterator)); + (*c.wrapped).next().await.unwrap().unwrap().custom_into() + }) +} diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index a5eb75552..a361f577b 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -11,6 +11,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + /// A few notes on the following enums: /// - Sqlx does provide type derivation for enums, but it's not very good /// - Queries using these enums require a number of additional queries to get their oids and @@ -52,8 +55,7 @@ pub(crate) struct ModelDatabaseData { } /// A model used for embedding, inference, etc... -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Model { pub(crate) name: String, pub(crate) runtime: ModelRuntime, @@ -67,7 +69,7 @@ impl Default for Model { } } -// #[alias_methods(new, transform)] +#[alias_methods(new, transform)] impl Model { /// Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index 7687c289f..5ee32bc88 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -13,9 +13,14 @@ use crate::{ #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython}; +#[cfg(feature = "c")] +use crate::{ + languages::c::JsonC, + languages::c::{GeneralJsonAsyncIteratorC, GeneralJsonIteratorC}, +}; + /// A drop in replacement for OpenAI -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct OpenSourceAI { database_url: Option, } @@ -163,13 +168,13 @@ impl Iterator for AsyncToSyncJsonIterator { } } -// #[alias_methods( -// new, -// chat_completions_create, -// chat_completions_create_async, -// chat_completions_create_stream, -// chat_completions_create_stream_async -// )] +#[alias_methods( + new, + chat_completions_create, + chat_completions_create_async, + chat_completions_create_stream, + chat_completions_create_stream_async +)] impl OpenSourceAI { /// Creates a new [OpenSourceAI] /// diff --git a/pgml-sdks/pgml/src/query_builder.rs b/pgml-sdks/pgml/src/query_builder.rs index 4250f9db1..ed35c08c3 100644 --- a/pgml-sdks/pgml/src/query_builder.rs +++ b/pgml-sdks/pgml/src/query_builder.rs @@ -12,6 +12,9 @@ use crate::{pipeline::Pipeline, types::Json, Collection}; #[cfg(feature = "python")] use crate::{pipeline::PipelinePython, types::JsonPython}; +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, pipeline::PipelineC}; + #[derive(alias, Clone, Debug)] pub struct QueryBuilder { collection: Collection, @@ -19,7 +22,7 @@ pub struct QueryBuilder { pipeline: Option, } -#[alias_methods(limit, filter, vector_recall, to_full_string, fetch_all)] +#[alias_methods(limit, filter, vector_recall, to_full_string, fetch_all(skip = "C"))] impl QueryBuilder { pub fn new(collection: Collection) -> Self { let query = json!({ diff --git a/pgml-sdks/pgml/src/query_runner.rs b/pgml-sdks/pgml/src/query_runner.rs index e4c8df750..cb5ba77cd 100644 --- a/pgml-sdks/pgml/src/query_runner.rs +++ b/pgml-sdks/pgml/src/query_runner.rs @@ -8,6 +8,9 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + #[derive(Clone, Debug)] enum BindValue { String(String), @@ -17,23 +20,22 @@ enum BindValue { Json(Json), } -// #[derive(alias, Clone, Debug)] -#[derive(Clone, Debug)] +#[derive(alias, Clone, Debug)] pub struct QueryRunner { query: String, bind_values: Vec, database_url: Option, } -// #[alias_methods( -// fetch_all, -// execute, -// bind_string, -// bind_int, -// bind_float, -// bind_bool, -// bind_json -// )] +#[alias_methods( + fetch_all, + execute, + bind_string, + bind_int, + bind_float, + bind_bool, + bind_json +)] impl QueryRunner { pub fn new(query: &str, database_url: Option) -> Self { Self { diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs index 091e1a21a..b7dd6c74d 100644 --- a/pgml-sdks/pgml/src/splitter.rs +++ b/pgml-sdks/pgml/src/splitter.rs @@ -11,6 +11,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + #[allow(dead_code)] #[derive(Debug, Clone)] pub(crate) struct SplitterDatabaseData { @@ -19,8 +22,7 @@ pub(crate) struct SplitterDatabaseData { } /// A text splitter -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Splitter { pub(crate) name: String, pub(crate) parameters: Json, @@ -33,7 +35,7 @@ impl Default for Splitter { } } -// #[alias_methods(new)] +#[alias_methods(new)] impl Splitter { /// Creates a new [Splitter] /// diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index bd50844c2..7210aa6e2 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -4,8 +4,7 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct TransformerPipeline { task: Json, database_url: Option, @@ -17,7 +16,10 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, JsonPython}; -// #[alias_methods(new, transform, transform_stream)] +#[cfg(feature = "c")] +use crate::{languages::c::GeneralJsonAsyncIteratorC, languages::c::JsonC}; + +#[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { /// Creates a new [TransformerPipeline] /// diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 4cd6921cf..d9eb18913 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -7,14 +7,13 @@ use std::{ use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; use crate::{ - common::{AttributeArgs, GetImplMethod}, + common::{AttributeArgs, GetImplMethod, SupportedLanguage}, types::{OutputType, SupportedType}, }; pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let name_ident = format_ident!("{}C", parsed.ident); let wrapped_type_ident = parsed.ident; - let wrapped_type_name = wrapped_type_ident.to_string(); let expanded = quote! { #[repr(C)] @@ -34,6 +33,15 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { } } + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<#wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + let s = Box::leak(Box::from_raw(c.wrapped)); + s.clone() + } + } + #[cfg(feature = "c")] unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { @@ -71,7 +79,7 @@ pub fn generate_c_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::C) { continue; } } @@ -164,6 +172,8 @@ pub fn generate_c_methods( } }; + eprintln!("\n\n{}\n\n", method); + methods.push(method); } @@ -189,22 +199,35 @@ fn get_method_arguments( let mut c_argument_prep = Vec::new(); let mut rust_function_arguments = Vec::new(); - if let Some(_receiver) = &method.receiver { + if let Some(receiver) = &method.receiver { c_function_arguments.push(format!("s: *mut {name_ident}")); - c_argument_prep.push(format!( - "let s: &mut {wrapped_type_ident} = s.custom_into();" - )); + if receiver.to_string().contains('&') { + c_argument_prep.push(format!( + "let s: &mut {wrapped_type_ident} = s.custom_into();" + )); + } else { + c_argument_prep.push(format!("let s: {wrapped_type_ident} = s.custom_into();")); + } rust_function_arguments.push("s".to_string()); } for (argument_name, argument_type) in &method.method_arguments { + let argument_name_without_mut = argument_name.replacen("mut", "", 1); let ( go_function_arguments_, go_arguments_prep_, c_function_arguments_, + c_function_argument_types, c_argument_prep_, rust_function_arguments_, - ) = get_c_types(argument_name, argument_type); + ) = get_c_types(&argument_name_without_mut, argument_type); + + let c_function_arguments_ = c_function_arguments_ + .into_iter() + .zip(c_function_argument_types) + .map(|(argument_name, argument_type)| format!("{argument_name}: {argument_type}")) + .collect::>() + .join(","); go_function_arguments.push(go_function_arguments_); go_arguments_prep.push(go_arguments_prep_); @@ -233,7 +256,7 @@ fn get_method_arguments( fn get_c_types( argument_name: &str, ty: &SupportedType, -) -> (String, String, String, String, String) { +) -> (String, String, Vec, Vec, String, String) { let t = ty.to_language_string(&None); let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); match ty { @@ -242,6 +265,7 @@ fn get_c_types( go_function_arguments, go_argument_prep, c_function_arguments, + c_function_argument_types, c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, &r.ty); @@ -249,6 +273,7 @@ fn get_c_types( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -256,7 +281,8 @@ fn get_c_types( SupportedType::str | SupportedType::String => ( "".to_string(), "".to_string(), - format!("{argument_name}: *mut std::ffi::c_char"), + vec![format!("{argument_name}")], + vec!["*mut std::ffi::c_char".to_string()], c_to_rust, argument_name.to_string(), ), @@ -265,14 +291,21 @@ fn get_c_types( go_function_arguments, go_argument_prep, mut c_function_arguments, + mut c_function_argument_types, c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, &r); + let v = c_function_argument_types.last_mut().unwrap(); + if !v.starts_with('*') { + *v = format!("*mut {v}"); + } + ( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -280,7 +313,8 @@ fn get_c_types( SupportedType::bool => ( "".to_string(), "".to_string(), - "bool".to_string(), + vec![format!("{argument_name}")], + vec!["bool".to_string()], "".to_string(), argument_name.to_string(), ), @@ -289,12 +323,15 @@ fn get_c_types( go_function_arguments, go_argument_prep, mut c_function_arguments, + mut c_function_argument_types, mut c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, v); - let mut c_function_arguments = c_function_arguments.replacen("*mut", "*mut *mut", 1); - c_function_arguments.push_str(", v_size: std::ffi::c_ulong"); + let v = c_function_argument_types.last_mut().unwrap(); + *v = v.replacen("*mut", "*mut *mut", 1); + c_function_arguments.push("v_size".to_string()); + c_function_argument_types.push("std::ffi::c_ulong".to_string()); c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); let c_to_rust = format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); @@ -303,6 +340,7 @@ fn get_c_types( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -313,35 +351,40 @@ fn get_c_types( SupportedType::i64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_longlong"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_long".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::u64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_ulonglong"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_ulong".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::i32 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_long"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_int".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::f64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_double"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_double".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::CustomType(s) => ( "".to_string(), "".to_string(), - format!("{argument_name}: *mut {s}C"), + vec![format!("{argument_name}")], + vec![format!("*mut {s}C")], c_to_rust, argument_name.to_string(), ), @@ -349,39 +392,6 @@ fn get_c_types( } } -// fn get_c_types(argument_name: &str, ty: &SupportedType) -> (String, Option) { -// match ty { -// SupportedType::Reference(r) => get_c_types(&r.ty), -// SupportedType::str | SupportedType::String => ("*mut std::ffi::c_char".to_string(), None), -// SupportedType::bool => ("bool".to_string(), None), -// SupportedType::Vec(v) => { -// let mut v = get_c_types(v); -// if !v.0.contains('*') { -// v.0 = format!("*mut {}", v.0); -// } -// if v.1.is_some() { -// panic!("Vec> not supported in c"); -// } -// (v.0, Some("std::ffi::c_ulong".to_string())) -// } -// SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), -// SupportedType::Option(r) => { -// let mut t = get_c_types(r); -// if !t.0.contains('*') { -// t.0 = format!("*mut {}", t.0); -// } -// t -// } -// SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), -// SupportedType::S => unreachable!(), -// SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), -// SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), -// SupportedType::i32 => ("std::ffi::c_long".to_string(), None), -// SupportedType::f64 => ("std::ffi::c_double".to_string(), None), -// SupportedType::CustomType(s) => (format!("*mut {s}"), None), -// } -// } - fn rust_type_to_c_type( wrapped_type_ident: &Ident, ty: &SupportedType, diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs index f17b4b63a..17d875fbe 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs @@ -1,30 +1,131 @@ -use proc_macro2::Ident; +use proc_macro2::{Group, Ident}; use quote::{format_ident, ToTokens}; use syn::{ - parse::Parser, + parenthesized, + parse::{Parse, Parser}, punctuated::Punctuated, + token, visit::{self, Visit}, - ImplItemFn, ReturnType, Token, Visibility, + Expr, ExprAssign, ImplItemFn, Lit, ReturnType, Token, Visibility, }; use crate::types::{GetOutputType, GetSupportedType, OutputType, SupportedType}; +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SupportedLanguage { + C, + Python, + JavaScript, +} + +impl From<&str> for SupportedLanguage { + fn from(value: &str) -> Self { + match value { + "C" => SupportedLanguage::C, + "Python" => SupportedLanguage::Python, + "JavaScript" => SupportedLanguage::JavaScript, + _ => panic!("Cannot convert {value} to SupportedLanguage"), + } + } +} + pub struct AttributeArgs { - pub args: Vec, + pub args: Vec, +} + +#[derive(Debug, Clone)] +struct Item { + method: String, + language_exceptions: Vec, +} + +#[derive(Debug)] +enum AdditionalAttribute { + Skip(SupportedLanguage), +} + +impl From<&ExprAssign> for AdditionalAttribute { + fn from(value: &ExprAssign) -> Self { + let a_ty = match &*value.left { + Expr::Path(p) => p.into_token_stream().to_string(), + _ => panic!( + r#"Getting left value - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ), + }; + match a_ty.as_str() { + "skip" => { + let skip_method = match &*value.right { + Expr::Lit(l) => match &l.lit { + Lit::Str(l) => l.value().as_str().into(), + _ => { + panic!( + r#"Getting Lit value - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ) + } + }, + _ => panic!( + r#"Getting Lit - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ), + }; + AdditionalAttribute::Skip(skip_method) + } + _ => panic!("Currently only skip additional attributes are supported"), + } + } +} + +impl Parse for Item { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let method: Ident = input.parse()?; + let lookahead = input.lookahead1(); + if !lookahead.peek(token::Paren) { + Ok(Self { + method: method.to_string(), + language_exceptions: Vec::new(), + }) + } else { + let group: Group = input.parse()?; + let group_parser = Punctuated::::parse_terminated; + let parsed_group = group_parser + .parse(group.stream().into()) + .expect("Error parsing attributes for custom_methods macro"); + let a_atts: Vec = parsed_group + .into_pairs() + .map(|p| p.value().into()) + .collect(); + // Update this part as needed + let mut language_exceptions = Vec::new(); + for att in a_atts { + match att { + AdditionalAttribute::Skip(a) => language_exceptions.push(a), + } + } + Ok(Self { + method: method.to_string(), + language_exceptions, + }) + } + } } impl AttributeArgs { pub fn new(attributes: proc_macro::TokenStream) -> Self { - let attribute_parser = Punctuated::::parse_terminated; + let attribute_parser = Punctuated::::parse_terminated; let parsed_attributes = attribute_parser .parse(attributes) .expect("Error parsing attributes for custom_methods macro"); - let args: Vec = parsed_attributes + let args: Vec = parsed_attributes .into_pairs() - .map(|p| p.value().to_string()) + .map(|p| p.value().clone()) .collect(); Self { args } } + + pub fn should_alias_method(&self, method_name: &str, language: SupportedLanguage) -> bool { + self.args + .iter() + .any(|item| item.method == method_name && !item.language_exceptions.contains(&language)) + } } #[derive(Debug)] diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs index 6aa5cf667..76ccea7c6 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs @@ -3,7 +3,7 @@ use std::fs::OpenOptions; use std::io::{Read, Write}; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; -use crate::common::{AttributeArgs, GetImplMethod}; +use crate::common::{AttributeArgs, GetImplMethod, SupportedLanguage}; use crate::types::{OutputType, SupportedType}; pub fn generate_javascript_alias(parsed: DeriveInput) -> proc_macro::TokenStream { @@ -112,7 +112,8 @@ pub fn generate_javascript_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::JavaScript) + { continue; } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index a453bf14f..87d1c8c4f 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -3,7 +3,7 @@ use std::fs::OpenOptions; use std::io::{Read, Write}; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; -use crate::common::{AttributeArgs, GetImplMethod}; +use crate::common::{AttributeArgs, GetImplMethod, SupportedLanguage}; use crate::types::{OutputType, SupportedType}; const STUB_TOP: &str = r#" @@ -192,7 +192,7 @@ pub fn generate_python_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::Python) { continue; } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index ddb7f3650..c74edf4bd 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -9,7 +9,6 @@ pub unsafe trait CustomIntoVec { unsafe fn custom_into_vec(self, size: usize) -> Vec; } -// unsafe impl> CustomIntoVec for *mut T2 { unsafe impl CustomIntoVec for *mut *mut T2 where *mut T2: CustomInto, @@ -43,6 +42,18 @@ unsafe impl CustomInto<*mut std::ffi::c_char> for String { } } +unsafe impl CustomInto for *mut std::ffi::c_int { + unsafe fn custom_into(self) -> i32 { + *self + } +} + +unsafe impl CustomInto for *mut std::ffi::c_double { + unsafe fn custom_into(self) -> f64 { + *self + } +} + unsafe impl CustomInto> for *mut T2 where *mut T2: CustomInto, From f252d77f8fbde74011969ec8d033da9a3486d4a2 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Sat, 4 May 2024 10:09:36 -0700 Subject: [PATCH 03/19] Moved to c --- pgml-sdks/pgml/{go => c}/Makefile | 0 pgml-sdks/pgml/{ => c}/go/go.mod | 0 pgml-sdks/pgml/{ => c}/go/pgml.go | 0 pgml-sdks/pgml/{go => c}/test.c | 4 +- pgml-sdks/pgml/c/zig/build.zig | 78 +++++++++++++++++++++++++++++++ pgml-sdks/pgml/c/zig/src/main.zig | 37 +++++++++++++++ pgml-sdks/pgml/src/collection.rs | 1 + pgml-sdks/pgml/src/languages/c.rs | 1 - 8 files changed, 118 insertions(+), 3 deletions(-) rename pgml-sdks/pgml/{go => c}/Makefile (100%) rename pgml-sdks/pgml/{ => c}/go/go.mod (100%) rename pgml-sdks/pgml/{ => c}/go/pgml.go (100%) rename pgml-sdks/pgml/{go => c}/test.c (96%) create mode 100644 pgml-sdks/pgml/c/zig/build.zig create mode 100644 pgml-sdks/pgml/c/zig/src/main.zig diff --git a/pgml-sdks/pgml/go/Makefile b/pgml-sdks/pgml/c/Makefile similarity index 100% rename from pgml-sdks/pgml/go/Makefile rename to pgml-sdks/pgml/c/Makefile diff --git a/pgml-sdks/pgml/go/go.mod b/pgml-sdks/pgml/c/go/go.mod similarity index 100% rename from pgml-sdks/pgml/go/go.mod rename to pgml-sdks/pgml/c/go/go.mod diff --git a/pgml-sdks/pgml/go/pgml.go b/pgml-sdks/pgml/c/go/pgml.go similarity index 100% rename from pgml-sdks/pgml/go/pgml.go rename to pgml-sdks/pgml/c/go/pgml.go diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/c/test.c similarity index 96% rename from pgml-sdks/pgml/go/test.c rename to pgml-sdks/pgml/c/test.c index 3fc1d053f..a7538e705 100644 --- a/pgml-sdks/pgml/go/test.c +++ b/pgml-sdks/pgml/c/test.c @@ -21,7 +21,7 @@ int main() { // Print the documents printf("\n\nPrinting documents:\n"); int i; - for (i = 0; i < r_size; i++) { + for (i = 0; i < r_size; ++i) { printf("Document %u -> %s\n", i, documents[i]); } @@ -29,7 +29,7 @@ int main() { r_size = 0; char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); printf("\n\nPrinting results:\n"); - for (i = 0; i < r_size; i++) { + for (i = 0; i < r_size; ++i) { printf("Result %u -> %s\n", i, results[i]); } diff --git a/pgml-sdks/pgml/c/zig/build.zig b/pgml-sdks/pgml/c/zig/build.zig new file mode 100644 index 000000000..300954738 --- /dev/null +++ b/pgml-sdks/pgml/c/zig/build.zig @@ -0,0 +1,78 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "zig", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + // Need to link our Rust pgml library + exe.addLibraryPath(.{ .path = "./../../target/debug" }); + exe.linkSystemLibrary("pgml"); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + // Need to link our Rust pgml library + exe.addLibraryPath(.{ .path = "./../../target/debug" }); + unit_tests.linkSystemLibrary("pgml"); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} diff --git a/pgml-sdks/pgml/c/zig/src/main.zig b/pgml-sdks/pgml/c/zig/src/main.zig new file mode 100644 index 000000000..af806dda1 --- /dev/null +++ b/pgml-sdks/pgml/c/zig/src/main.zig @@ -0,0 +1,37 @@ +const pgml = @cImport({ + // See https://github.com/ziglang/zig/issues/515 + // @cDefine("_NO_CRT_STDIO_INLINE", "1"); + // @cInclude("./../pgml.h"); + @cInclude("./../pgml.h"); +}); + +pub fn main() void { + // Create the Collection and Pipeline + var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); + var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); + + // Add the Pipeline to the Collection + pgml.CollectionC_add_pipeline(collection, pipeline); + + // Upsert the documents + // const documents_to_upsert: [2][]const u8 = .{ "{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}" }; + // const c_documents_to_upsert: [*c][*c]pgml.JsonC = @as([*c][*c]pgml.JsonC, @ptrCast(@constCast(documents_to_upsert[0..2].ptr))); + // pgml.CollectionC_upsert_documents(collection, c_documents_to_upsert, 2, null); +} + +// test "simple test" { +// // Create the Collection and Pipeline +// var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); +// var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); + +// // Add the Pipeline to the Collection +// pgml.CollectionC_add_pipeline(collection, pipeline); + +// // Upsert the documents +// // char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; +// // CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); + +// // // Retrieve the documents +// // unsigned long r_size = 0; +// // char** documents = CollectionC_get_documents(collection, NULL, &r_size); +// } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index ba24420ab..0209f1c40 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -518,6 +518,7 @@ impl Collection { documents: Vec, args: Option, ) -> anyhow::Result<()> { + eprintln!("IN THE UPSERT DOCUMENTS FUNCTION"); // The flow for this function // 1. Create the collection if it does not exist // 2. Get all pipelines where ACTIVE = TRUE diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 78bafd858..a9c42cf5b 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -9,7 +9,6 @@ pub type JsonC = std::ffi::c_char; unsafe impl CustomInto for *mut JsonC { unsafe fn custom_into(self) -> Json { let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); - eprintln!("\nABOU TO DECODE: {}\n", s); serde_json::from_str::(s).unwrap().into() } } From a67b104218605db78684ca96d80d0f304bf48ccd Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 13:35:07 -0500 Subject: [PATCH 04/19] remove #[repr(C)] --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index d9eb18913..f823b9432 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -16,7 +16,6 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let wrapped_type_ident = parsed.ident; let expanded = quote! { - #[repr(C)] #[cfg(feature = "c")] pub struct #name_ident { pub wrapped: *mut #wrapped_type_ident From 7dc364de979d3afe8f45b17ee39215512d3b6925 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 13:36:19 -0500 Subject: [PATCH 05/19] remove unused imports --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index f823b9432..84a0fc70c 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -1,9 +1,6 @@ use proc_macro2::Ident; -use quote::{format_ident, quote, ToTokens}; -use std::{ - io::{Read, Write}, - str::FromStr, -}; +use quote::{format_ident, quote}; +use std::str::FromStr; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; use crate::{ From daf5510d99ef47bc45cb95a2bb39aa071d4c031e Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 14:14:15 -0500 Subject: [PATCH 06/19] only Box once; add destructor --- .../rust-bridge/rust-bridge-macros/src/c.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 84a0fc70c..68692078a 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -15,7 +15,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let expanded = quote! { #[cfg(feature = "c")] pub struct #name_ident { - pub wrapped: *mut #wrapped_type_ident + pub wrapped: #wrapped_type_ident } #[cfg(feature = "c")] @@ -23,7 +23,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe fn custom_into(self) -> *mut #name_ident { Box::into_raw(Box::new( #name_ident { - wrapped: Box::into_raw(Box::new(self)) + wrapped: self } )) } @@ -32,9 +32,8 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { #[cfg(feature = "c")] unsafe impl rust_bridge::c::CustomInto<#wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> #wrapped_type_ident { - let c = Box::leak(Box::from_raw(self)); - let s = Box::leak(Box::from_raw(c.wrapped)); - s.clone() + let c = Box::from_raw(self); + c.wrapped } } @@ -42,7 +41,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { let c = Box::leak(Box::from_raw(self)); - Box::leak(Box::from_raw(c.wrapped)) + &mut c.wrapped } } @@ -50,11 +49,13 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe impl rust_bridge::c::CustomInto<&'static #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static #wrapped_type_ident { let c = Box::leak(Box::from_raw(self)); - &*Box::leak(Box::from_raw(c.wrapped)) + &c.wrapped } } }; + eprintln!("\n\n{expanded}\n\n"); + proc_macro::TokenStream::from(expanded) } @@ -173,6 +174,18 @@ pub fn generate_c_methods( methods.push(method); } + let method_name = format_ident!("{name_ident}_delete"); + let destructor = quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(ptr: *mut #name_ident) { + drop(Box::from_raw(ptr)) + } + }; + + eprintln!("\n\n{destructor}\n\n"); + methods.push(destructor); + proc_macro::TokenStream::from(quote! { #(#methods)* }) From eb6b83b95c9df33ca1ea62dab63214fec93f41b8 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 15:15:36 -0500 Subject: [PATCH 07/19] lowercase function names; prefix pgml --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 68692078a..139c6d476 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -99,7 +99,11 @@ pub fn generate_c_methods( rust_function_arguments, ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); - let method_name = format_ident!("{}_{}", name_ident, method_ident); + let method_name = format_ident!( + "pgml_{}_{}", + name_ident.to_string().to_lowercase(), + method_ident + ); let (return_part, augment_r_size) = rust_output_to_c_output(&wrapped_type_ident, &method.output_type); @@ -174,7 +178,7 @@ pub fn generate_c_methods( methods.push(method); } - let method_name = format_ident!("{name_ident}_delete"); + let method_name = format_ident!("pgml_{}_delete", name_ident.to_string().to_lowercase()); let destructor = quote! { #[cfg(feature = "c")] #[no_mangle] From 81920ce45941fd97848915643073761edfabf585 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:08:58 -0500 Subject: [PATCH 08/19] fix clippy lints --- pgml-sdks/pgml/src/builtins.rs | 2 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 2 +- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs | 3 +-- pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs | 6 +++--- pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs | 2 -- 8 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index 638e63353..37923efb1 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -87,7 +87,7 @@ impl Builtins { query.bind(task.0) }; let results = query.bind(inputs).bind(args).fetch_all(&pool).await?; - let results = results.get(0).unwrap().get::(0); + let results = results.first().unwrap().get::(0); Ok(Json(results)) } } diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index 7210aa6e2..860c5543c 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -88,7 +88,7 @@ impl TransformerPipeline { .fetch_all(&pool) .await? }; - let results = results.get(0).unwrap().get::(0); + let results = results.first().unwrap().get::(0); Ok(Json(results)) } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 139c6d476..206217234 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -401,7 +401,6 @@ fn get_c_types( c_to_rust, argument_name.to_string(), ), - _ => todo!(), } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs index 17d875fbe..dc9ec066b 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs @@ -1,7 +1,6 @@ use proc_macro2::{Group, Ident}; use quote::{format_ident, ToTokens}; use syn::{ - parenthesized, parse::{Parse, Parser}, punctuated::Punctuated, token, @@ -30,7 +29,7 @@ impl From<&str> for SupportedLanguage { } pub struct AttributeArgs { - pub args: Vec, + args: Vec, } #[derive(Debug, Clone)] diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs index 76ccea7c6..41b1396d9 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs @@ -301,7 +301,6 @@ pub fn generate_javascript_methods( if let Ok(path) = path { let mut file = OpenOptions::new() .create(true) - .write(true) .append(true) .read(true) .open(path) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index 87d1c8c4f..835303f12 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -373,7 +373,6 @@ pub fn generate_python_methods( if let Ok(path) = path { let mut file = OpenOptions::new() .create(true) - .write(true) .append(true) .read(true) .open(path) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs index 99947b1da..6629995a3 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs @@ -37,9 +37,9 @@ pub enum SupportedType { CustomType(String), } -impl ToString for SupportedType { - fn to_string(&self) -> String { - self.to_language_string(&None) +impl std::fmt::Display for SupportedType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_language_string(&None)) } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index c74edf4bd..76cc80ee1 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - /// Very similar to the `Into` trait, but we can implement it on foreign types. pub unsafe trait CustomInto { unsafe fn custom_into(self) -> T; From fc46c1a4e7803c76dafafbaafd6f6ff37709272d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:16:30 -0500 Subject: [PATCH 09/19] more clippy --- pgml-sdks/pgml/Cargo.toml | 2 +- pgml-sdks/pgml/src/languages/c.rs | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index 89c2f6275..a4e3af03d 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -50,7 +50,7 @@ parking_lot = "0.12.1" once_cell = "1.19.0" [features] -default = [] +default = ["c"] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] c = [] diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index a9c42cf5b..1538bd369 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -1,5 +1,4 @@ -use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; -use futures::pin_mut; +use crate::types::{GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; use futures::stream::Stream; use rust_bridge::c::CustomInto; use std::pin::Pin; @@ -36,12 +35,8 @@ unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { #[no_mangle] pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { - let mut c = Box::leak(Box::from_raw(iterator)); - if let Some(_) = (*c.wrapped).peek() { - false - } else { - true - } + let c = Box::leak(Box::from_raw(iterator)); + (*c.wrapped).peek().is_none() } #[no_mangle] @@ -53,11 +48,12 @@ pub unsafe extern "C" fn GeneralJsonIteratorC_next( (*b).next().unwrap().unwrap().custom_into() } +type PeekableStream = + futures::stream::Peekable> + Send>>>; + #[repr(C)] pub struct GeneralJsonAsyncIteratorC { - pub wrapped: *mut futures::stream::Peekable< - Pin> + Send>>, - >, + pub wrapped: *mut PeekableStream, } unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { @@ -74,16 +70,11 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( iterator: *mut GeneralJsonAsyncIteratorC, ) -> bool { crate::get_or_set_runtime().block_on(async move { - use futures::stream::StreamExt; let c = Box::leak(Box::from_raw(iterator)); let s = Box::leak(Box::from_raw(c.wrapped)); let mut s = Pin::new(s); let res = s.as_mut().peek_mut().await; - if let Some(res) = res { - false - } else { - true - } + res.is_none() }) } @@ -93,7 +84,7 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( ) -> *mut JsonC { crate::get_or_set_runtime().block_on(async move { use futures::stream::StreamExt; - let mut c = Box::leak(Box::from_raw(iterator)); + let c = Box::leak(Box::from_raw(iterator)); (*c.wrapped).next().await.unwrap().unwrap().custom_into() }) } From efbceed7571db3b55b9fe9abf0590373a52e330c Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:22:10 -0500 Subject: [PATCH 10/19] even more clippy lints --- .../rust-bridge/rust-bridge-macros/src/c.rs | 73 +++---------------- 1 file changed, 10 insertions(+), 63 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 206217234..d4af6a5c2 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -91,13 +91,8 @@ pub fn generate_c_methods( } let method_ident = method.method_ident.clone(); - let ( - go_function_arguments, - go_arguments_prep, - mut c_function_arguments, - c_argument_prep, - rust_function_arguments, - ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); + let (mut c_function_arguments, c_argument_prep, rust_function_arguments) = + get_method_arguments(&wrapped_type_ident, &name_ident, &method); let method_name = format_ident!( "pgml_{}_{}", @@ -203,11 +198,7 @@ fn get_method_arguments( proc_macro2::TokenStream, proc_macro2::TokenStream, proc_macro2::TokenStream, - proc_macro2::TokenStream, - proc_macro2::TokenStream, ) { - let mut go_function_arguments = Vec::new(); - let mut go_arguments_prep = Vec::new(); let mut c_function_arguments = Vec::new(); let mut c_argument_prep = Vec::new(); let mut rust_function_arguments = Vec::new(); @@ -227,8 +218,6 @@ fn get_method_arguments( for (argument_name, argument_type) in &method.method_arguments { let argument_name_without_mut = argument_name.replacen("mut", "", 1); let ( - go_function_arguments_, - go_arguments_prep_, c_function_arguments_, c_function_argument_types, c_argument_prep_, @@ -242,16 +231,12 @@ fn get_method_arguments( .collect::>() .join(","); - go_function_arguments.push(go_function_arguments_); - go_arguments_prep.push(go_arguments_prep_); c_function_arguments.push(c_function_arguments_); c_argument_prep.push(c_argument_prep_); rust_function_arguments.push(rust_function_arguments_); } ( - proc_macro2::TokenStream::from_str(&go_function_arguments.join("\n")).unwrap(), - proc_macro2::TokenStream::from_str(&go_arguments_prep.join("\n")).unwrap(), proc_macro2::TokenStream::from_str(&c_function_arguments.join(",")).unwrap(), proc_macro2::TokenStream::from_str(&c_argument_prep.join("\n")).unwrap(), proc_macro2::TokenStream::from_str(&rust_function_arguments.join(",")).unwrap(), @@ -269,22 +254,14 @@ fn get_method_arguments( fn get_c_types( argument_name: &str, ty: &SupportedType, -) -> (String, String, Vec, Vec, String, String) { +) -> (Vec, Vec, String, String) { let t = ty.to_language_string(&None); let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); match ty { SupportedType::Reference(r) => { - let ( - go_function_arguments, - go_argument_prep, - c_function_arguments, - c_function_argument_types, - c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, &r.ty); + let (c_function_arguments, c_function_argument_types, _, _) = + get_c_types(argument_name, &r.ty); ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -292,22 +269,14 @@ fn get_c_types( ) } SupportedType::str | SupportedType::String => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["*mut std::ffi::c_char".to_string()], c_to_rust, argument_name.to_string(), ), SupportedType::Option(r) => { - let ( - go_function_arguments, - go_argument_prep, - mut c_function_arguments, - mut c_function_argument_types, - c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, &r); + let (c_function_arguments, mut c_function_argument_types, _, _) = + get_c_types(argument_name, r); let v = c_function_argument_types.last_mut().unwrap(); if !v.starts_with('*') { @@ -315,8 +284,6 @@ fn get_c_types( } ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -324,34 +291,24 @@ fn get_c_types( ) } SupportedType::bool => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["bool".to_string()], "".to_string(), argument_name.to_string(), ), SupportedType::Vec(v) => { - let ( - go_function_arguments, - go_argument_prep, - mut c_function_arguments, - mut c_function_argument_types, - mut c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, v); + let (mut c_function_arguments, mut c_function_argument_types, _, _) = + get_c_types(argument_name, v); let v = c_function_argument_types.last_mut().unwrap(); *v = v.replacen("*mut", "*mut *mut", 1); c_function_arguments.push("v_size".to_string()); c_function_argument_types.push("std::ffi::c_ulong".to_string()); - c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); + let c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); let c_to_rust = format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -362,40 +319,30 @@ fn get_c_types( SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), SupportedType::S => unreachable!(), SupportedType::i64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_long".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::u64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_ulong".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::i32 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_int".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::f64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_double".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::CustomType(s) => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec![format!("*mut {s}C")], c_to_rust, From 2b181deca72b9d850fe31c1173f98be116343a49 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:02:35 -0500 Subject: [PATCH 11/19] tweak C Makefile and example --- pgml-sdks/pgml/Cargo.toml | 5 +- pgml-sdks/pgml/c/Makefile | 34 +++------ pgml-sdks/pgml/c/cbindgen.toml | 117 ++++++++++++++++++++++++++++++ pgml-sdks/pgml/c/example/main.c | 45 ++++++++++++ pgml-sdks/pgml/c/go/go.mod | 3 - pgml-sdks/pgml/c/go/pgml.go | 23 ------ pgml-sdks/pgml/c/test.c | 45 ------------ pgml-sdks/pgml/c/zig/build.zig | 78 -------------------- pgml-sdks/pgml/c/zig/src/main.zig | 37 ---------- pgml-sdks/pgml/src/collection.rs | 1 - 10 files changed, 173 insertions(+), 215 deletions(-) create mode 100644 pgml-sdks/pgml/c/cbindgen.toml create mode 100644 pgml-sdks/pgml/c/example/main.c delete mode 100644 pgml-sdks/pgml/c/go/go.mod delete mode 100644 pgml-sdks/pgml/c/go/pgml.go delete mode 100644 pgml-sdks/pgml/c/test.c delete mode 100644 pgml-sdks/pgml/c/zig/build.zig delete mode 100644 pgml-sdks/pgml/c/zig/src/main.zig diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index a4e3af03d..b2d1c7c5f 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -13,9 +13,6 @@ keywords = ["postgres", "machine learning", "vector databases", "embeddings"] name = "pgml" crate-type = ["lib", "cdylib"] -[rust-analyzer.checkOnSave] -extraArgs = ["--target-dir", "/path/to/proect/target/check"] - [dependencies] rust_bridge = {path = "../rust-bridge/rust-bridge", version = "0.1.0"} sqlx = { version = "0.7.3", features = [ "runtime-tokio-rustls", "postgres", "json", "time", "uuid"] } @@ -50,7 +47,7 @@ parking_lot = "0.12.1" once_cell = "1.19.0" [features] -default = ["c"] +default = [] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] c = [] diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index a8d614023..b7a0724c0 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -1,31 +1,17 @@ BINARY_NAME=pgml +HEADER=include/${BINARY_NAME}.h +PGML_LIB=../target/debug/ -build: - cargo build --features c - cargo expand --features c > expanded.rs - cbindgen --lang C -o pgml.h expanded.rs - # GOARCH=amd64 GOOS=darwin go build -o ${BINARY_NAME}-darwin main.go - GOARCH=amd64 GOOS=linux go build -o ${BINARY_NAME}-linux pgml.go - # GOARCH=amd64 GOOS=windows go build -o ${BINARY_NAME}-windows main.go +bindings: + cargo b --features c + RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ -build_test: - cargo build --features c - cargo expand --features c > expanded.rs - cbindgen --lang C -o pgml.h expanded.rs - gcc test.c -o test -l pgml -L ./../target/debug - -test: build_test - LD_LIBRARY_PATH=./../target/debug ./test - -test_c: - gcc test.c -o test -l pgml -L ./../target/debug - LD_LIBRARY_PATH=./../target/debug ./test +build: bindings + gcc -Wall -o build/example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example/main.c run: build - LD_LIBRARY_PATH=./../target/debug ./${BINARY_NAME}-linux + LD_LIBRARY_PATH=${PGML_LIB} ./build/example clean: - go clean - # rm ${BINARY_NAME}-darwin - rm ${BINARY_NAME}-linux - # rm ${BINARY_NAME}-windows + rm ${HEADER} + diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml new file mode 100644 index 000000000..4efcf2453 --- /dev/null +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -0,0 +1,117 @@ +language = "C" + + +############## Options for Wrapping the Contents of the Header ################# + +# header = "/* Text to put at the beginning of the generated file. Probably a license. */" +# trailer = "/* Text to put at the end of the generated file */" +# include_guard = "my_bindings_h" +# pragma_once = true +# autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +include_version = false +# namespace = "my_namespace" +namespaces = [] +using_namespaces = [] +sys_includes = [] +includes = [] +no_includes = false +# cpp_compat = true +after_includes = "" + + +############################ Code Style Options ################################ + +braces = "SameLine" +line_length = 100 +tab_width = 2 +documentation = true +documentation_style = "auto" +documentation_length = "full" +line_endings = "LF" # also "CR", "CRLF", "Native" + + +############################# Codegen Options ################################## + +style = "both" +sort_by = "Name" # default for `fn.sort_by` and `const.sort_by` +usize_is_size_t = true + +[defines] +# "target_os = freebsd" = "DEFINE_FREEBSD" +# "feature = serde" = "DEFINE_SERDE" + +[export] +include = [] +exclude = [] +# prefix = "CAPI_" +item_types = [] +renaming_overrides_prefixing = false + +[export.rename] + +[export.body] + +[export.mangle] + +[fn] +rename_args = "None" +# must_use = "MUST_USE_FUNC" +# deprecated = "DEPRECATED_FUNC" +# deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE" +# no_return = "NO_RETURN" +# prefix = "START_FUNC" +# postfix = "END_FUNC" +args = "auto" +sort_by = "Name" + +[struct] +rename_fields = "None" +# must_use = "MUST_USE_STRUCT" +# deprecated = "DEPRECATED_STRUCT" +# deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE" +derive_constructor = false +derive_eq = false +derive_neq = false +derive_lt = false +derive_lte = false +derive_gt = false +derive_gte = false + +[enum] +rename_variants = "None" +# must_use = "MUST_USE_ENUM" +# deprecated = "DEPRECATED_ENUM" +# deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE" +add_sentinel = false +prefix_with_name = false +derive_helper_methods = false +derive_const_casts = false +derive_mut_casts = false +# cast_assert_name = "ASSERT" +derive_tagged_enum_destructor = false +derive_tagged_enum_copy_constructor = false +enum_class = true +private_default_tagged_enum_constructor = false + +[const] +allow_static_const = true +allow_constexpr = false +sort_by = "Name" + +[macro_expansion] +bitflags = false + +############## Options for How Your Rust library Should Be Parsed ############## + +[parse] +parse_deps = false +# include = [] +exclude = [] +clean = false +extra_bindings = [] + +[parse.expand] +crates = ["pgml"] +all_features = false +default_features = true +features = ["c"] diff --git a/pgml-sdks/pgml/c/example/main.c b/pgml-sdks/pgml/c/example/main.c new file mode 100644 index 000000000..092547f2c --- /dev/null +++ b/pgml-sdks/pgml/c/example/main.c @@ -0,0 +1,45 @@ +#include + +#include "pgml.h" + +int main() { + // Create the Collection and Pipeline + CollectionC * collection = pgml_collectionc_new("test_c", NULL); + PipelineC * pipeline = pgml_pipelinec_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); + + // Add the Pipeline to the Collection + pgml_collectionc_add_pipeline(collection, pipeline); + + // Upsert the documents + char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; + pgml_collectionc_upsert_documents(collection, documents_to_upsert, 2, NULL); + + // Retrieve the documents + unsigned long r_size = 0; + char** documents = pgml_collectionc_get_documents(collection, NULL, &r_size); + + // Print the documents + printf("\n\nPrinting documents:\n"); + int i; + for (i = 0; i < r_size; ++i) { + printf("Document %u -> %s\n", i, documents[i]); + } + + // Search over the documents + r_size = 0; + char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); + printf("\n\nPrinting results:\n"); + for (i = 0; i < r_size; ++i) { + printf("Result %u -> %s\n", i, results[i]); + } + + // Test the TransformerPipeline + TransformerPipelineC * t_pipeline = pgml_transformerpipelinec_new("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", NULL, NULL); + GeneralJsonAsyncIteratorC * t_pipeline_iter = pgml_transformerpipelinec_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); + while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { + char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + printf("Token -> %s\n", res); + } + + return 0; +} diff --git a/pgml-sdks/pgml/c/go/go.mod b/pgml-sdks/pgml/c/go/go.mod deleted file mode 100644 index 6b1511192..000000000 --- a/pgml-sdks/pgml/c/go/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module pgml - -go 1.22.2 diff --git a/pgml-sdks/pgml/c/go/pgml.go b/pgml-sdks/pgml/c/go/pgml.go deleted file mode 100644 index e22b91dd6..000000000 --- a/pgml-sdks/pgml/c/go/pgml.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -/* -#cgo LDFLAGS: -l pgml -L ./../target/debug -#include "pgml.h" -*/ -import "C" - -import ( - "unsafe" -) - -type Collection struct { - collection *C.CollectionC -} - -func main() { - c_string_p := C.CString("Test CString") - defer C.free(unsafe.Pointer(c_string_p)) - collection := C.new_collection(c_string_p) - C.test_collection(collection) - defer C.free_collection(collection) -} diff --git a/pgml-sdks/pgml/c/test.c b/pgml-sdks/pgml/c/test.c deleted file mode 100644 index a7538e705..000000000 --- a/pgml-sdks/pgml/c/test.c +++ /dev/null @@ -1,45 +0,0 @@ -#include - -#include "pgml.h" - -int main() { - // Create the Collection and Pipeline - CollectionC * collection = CollectionC_new("test_c", NULL); - PipelineC * pipeline = PipelineC_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); - - // Add the Pipeline to the Collection - CollectionC_add_pipeline(collection, pipeline); - - // Upsert the documents - char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; - CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); - - // Retrieve the documents - unsigned long r_size = 0; - char** documents = CollectionC_get_documents(collection, NULL, &r_size); - - // Print the documents - printf("\n\nPrinting documents:\n"); - int i; - for (i = 0; i < r_size; ++i) { - printf("Document %u -> %s\n", i, documents[i]); - } - - // Search over the documents - r_size = 0; - char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); - printf("\n\nPrinting results:\n"); - for (i = 0; i < r_size; ++i) { - printf("Result %u -> %s\n", i, results[i]); - } - - // Test the TransformerPipeline - TransformerPipelineC * t_pipeline = TransformerPipelineC_new("text-generation", "TheBloke/zephyr-7B-beta-GPTQ", "{\"revision\": \"main\"}", "postgres://pg:ml@sql.cloud.postgresml.org:38042/pgml"); - GeneralJsonAsyncIteratorC * t_pipeline_iter = TransformerPipelineC_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); - while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { - char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); - printf("Token -> %s\n", res); - } - - return 0; -} diff --git a/pgml-sdks/pgml/c/zig/build.zig b/pgml-sdks/pgml/c/zig/build.zig deleted file mode 100644 index 300954738..000000000 --- a/pgml-sdks/pgml/c/zig/build.zig +++ /dev/null @@ -1,78 +0,0 @@ -const std = @import("std"); - -// Although this function looks imperative, note that its job is to -// declaratively construct a build graph that will be executed by an external -// runner. -pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. - const target = b.standardTargetOptions(.{}); - - // Standard optimization options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not - // set a preferred release mode, allowing the user to decide how to optimize. - const optimize = b.standardOptimizeOption(.{}); - - const exe = b.addExecutable(.{ - .name = "zig", - // In this case the main source file is merely a path, however, in more - // complicated build scripts, this could be a generated file. - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - // Need to link our Rust pgml library - exe.addLibraryPath(.{ .path = "./../../target/debug" }); - exe.linkSystemLibrary("pgml"); - - // This declares intent for the executable to be installed into the - // standard location when the user invokes the "install" step (the default - // step when running `zig build`). - b.installArtifact(exe); - - // This *creates* a Run step in the build graph, to be executed when another - // step is evaluated that depends on it. The next line below will establish - // such a dependency. - const run_cmd = b.addRunArtifact(exe); - - // By making the run step depend on the install step, it will be run from the - // installation directory rather than directly from within the cache directory. - // This is not necessary, however, if the application depends on other installed - // files, this ensures they will be present and in the expected location. - run_cmd.step.dependOn(b.getInstallStep()); - - // This allows the user to pass arguments to the application in the build - // command itself, like this: `zig build run -- arg1 arg2 etc` - if (b.args) |args| { - run_cmd.addArgs(args); - } - - // This creates a build step. It will be visible in the `zig build --help` menu, - // and can be selected like this: `zig build run` - // This will evaluate the `run` step rather than the default, which is "install". - const run_step = b.step("run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - // Creates a step for unit testing. This only builds the test executable - // but does not run it. - const unit_tests = b.addTest(.{ - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - // Need to link our Rust pgml library - exe.addLibraryPath(.{ .path = "./../../target/debug" }); - unit_tests.linkSystemLibrary("pgml"); - - const run_unit_tests = b.addRunArtifact(unit_tests); - - // Similar to creating the run step earlier, this exposes a `test` step to - // the `zig build --help` menu, providing a way for the user to request - // running the unit tests. - const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_unit_tests.step); -} diff --git a/pgml-sdks/pgml/c/zig/src/main.zig b/pgml-sdks/pgml/c/zig/src/main.zig deleted file mode 100644 index af806dda1..000000000 --- a/pgml-sdks/pgml/c/zig/src/main.zig +++ /dev/null @@ -1,37 +0,0 @@ -const pgml = @cImport({ - // See https://github.com/ziglang/zig/issues/515 - // @cDefine("_NO_CRT_STDIO_INLINE", "1"); - // @cInclude("./../pgml.h"); - @cInclude("./../pgml.h"); -}); - -pub fn main() void { - // Create the Collection and Pipeline - var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); - var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); - - // Add the Pipeline to the Collection - pgml.CollectionC_add_pipeline(collection, pipeline); - - // Upsert the documents - // const documents_to_upsert: [2][]const u8 = .{ "{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}" }; - // const c_documents_to_upsert: [*c][*c]pgml.JsonC = @as([*c][*c]pgml.JsonC, @ptrCast(@constCast(documents_to_upsert[0..2].ptr))); - // pgml.CollectionC_upsert_documents(collection, c_documents_to_upsert, 2, null); -} - -// test "simple test" { -// // Create the Collection and Pipeline -// var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); -// var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); - -// // Add the Pipeline to the Collection -// pgml.CollectionC_add_pipeline(collection, pipeline); - -// // Upsert the documents -// // char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; -// // CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); - -// // // Retrieve the documents -// // unsigned long r_size = 0; -// // char** documents = CollectionC_get_documents(collection, NULL, &r_size); -// } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index 0209f1c40..ba24420ab 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -518,7 +518,6 @@ impl Collection { documents: Vec, args: Option, ) -> anyhow::Result<()> { - eprintln!("IN THE UPSERT DOCUMENTS FUNCTION"); // The flow for this function // 1. Create the collection if it does not exist // 2. Get all pipelines where ACTIVE = TRUE From 43c6773bea51cea71bad522b76ff6f7c5aa0fc5d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:10:50 -0500 Subject: [PATCH 12/19] renaming --- pgml-sdks/pgml/c/Makefile | 7 ++++--- pgml-sdks/pgml/c/{example/main.c => example.c} | 0 2 files changed, 4 insertions(+), 3 deletions(-) rename pgml-sdks/pgml/c/{example/main.c => example.c} (100%) diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index b7a0724c0..3890e0661 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -1,5 +1,5 @@ BINARY_NAME=pgml -HEADER=include/${BINARY_NAME}.h +HEADER=${BINARY_NAME}.h PGML_LIB=../target/debug/ bindings: @@ -7,11 +7,12 @@ bindings: RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ build: bindings - gcc -Wall -o build/example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example/main.c + gcc -Wall -o ./example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example.c run: build - LD_LIBRARY_PATH=${PGML_LIB} ./build/example + LD_LIBRARY_PATH=${PGML_LIB} ./example clean: rm ${HEADER} + rm -rf ./example diff --git a/pgml-sdks/pgml/c/example/main.c b/pgml-sdks/pgml/c/example.c similarity index 100% rename from pgml-sdks/pgml/c/example/main.c rename to pgml-sdks/pgml/c/example.c From e50294b092af433955e7346b460888950cdef133 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:19:40 -0500 Subject: [PATCH 13/19] rename fns; only forward declare iterators --- pgml-sdks/pgml/c/Makefile | 4 +++- pgml-sdks/pgml/c/example.c | 4 ++-- pgml-sdks/pgml/src/languages/c.rs | 17 ++++++++--------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index 3890e0661..192766cfe 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -4,7 +4,9 @@ PGML_LIB=../target/debug/ bindings: cargo b --features c - RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ + rustup default nightly + cbindgen --config cbindgen.toml --output ${HEADER} ../ + rustup default stable build: bindings gcc -Wall -o ./example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example.c diff --git a/pgml-sdks/pgml/c/example.c b/pgml-sdks/pgml/c/example.c index 092547f2c..8dd6bd3a7 100644 --- a/pgml-sdks/pgml/c/example.c +++ b/pgml-sdks/pgml/c/example.c @@ -36,8 +36,8 @@ int main() { // Test the TransformerPipeline TransformerPipelineC * t_pipeline = pgml_transformerpipelinec_new("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", NULL, NULL); GeneralJsonAsyncIteratorC * t_pipeline_iter = pgml_transformerpipelinec_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); - while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { - char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + while (!pgml_generaljsonasynciteratorc_done(t_pipeline_iter)) { + char * res = pgml_generaljsonasynciteratorc_next(t_pipeline_iter); printf("Token -> %s\n", res); } diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 1538bd369..9118b0cd4 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -19,10 +19,8 @@ unsafe impl CustomInto<*mut JsonC> for Json { } } -#[repr(C)] pub struct GeneralJsonIteratorC { - pub wrapped: - *mut std::iter::Peekable> + Send>>, + wrapped: *mut std::iter::Peekable> + Send>>, } unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { @@ -34,13 +32,15 @@ unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { +pub unsafe extern "C" fn pgml_generaljsoniteratorc_done( + iterator: *mut GeneralJsonIteratorC, +) -> bool { let c = Box::leak(Box::from_raw(iterator)); (*c.wrapped).peek().is_none() } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonIteratorC_next( +pub unsafe extern "C" fn pgml_generaljsoniteratorc_next( iterator: *mut GeneralJsonIteratorC, ) -> *mut JsonC { let c = Box::leak(Box::from_raw(iterator)); @@ -51,9 +51,8 @@ pub unsafe extern "C" fn GeneralJsonIteratorC_next( type PeekableStream = futures::stream::Peekable> + Send>>>; -#[repr(C)] pub struct GeneralJsonAsyncIteratorC { - pub wrapped: *mut PeekableStream, + wrapped: *mut PeekableStream, } unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { @@ -66,7 +65,7 @@ unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncItera } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( +pub unsafe extern "C" fn pgml_generaljsonasynciteratorc_done( iterator: *mut GeneralJsonAsyncIteratorC, ) -> bool { crate::get_or_set_runtime().block_on(async move { @@ -79,7 +78,7 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( +pub unsafe extern "C" fn pgml_generaljsonasynciteratorc_next( iterator: *mut GeneralJsonAsyncIteratorC, ) -> *mut JsonC { crate::get_or_set_runtime().block_on(async move { From 77ccc3af585e8212389b558a9c69bf92bfaf63a8 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:23:58 -0500 Subject: [PATCH 14/19] call destructors in example; remove eprintln --- pgml-sdks/pgml/c/example.c | 5 +++++ pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pgml-sdks/pgml/c/example.c b/pgml-sdks/pgml/c/example.c index 8dd6bd3a7..fc85d6523 100644 --- a/pgml-sdks/pgml/c/example.c +++ b/pgml-sdks/pgml/c/example.c @@ -41,5 +41,10 @@ int main() { printf("Token -> %s\n", res); } + // cleanup + pgml_transformerpipelinec_delete(t_pipeline); + pgml_pipelinec_delete(pipeline); + pgml_collectionc_delete(collection); + return 0; } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index d4af6a5c2..642d67238 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -54,8 +54,6 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { } }; - eprintln!("\n\n{expanded}\n\n"); - proc_macro::TokenStream::from(expanded) } @@ -168,8 +166,6 @@ pub fn generate_c_methods( } }; - eprintln!("\n\n{}\n\n", method); - methods.push(method); } @@ -182,7 +178,6 @@ pub fn generate_c_methods( } }; - eprintln!("\n\n{destructor}\n\n"); methods.push(destructor); proc_macro::TokenStream::from(quote! { From f03d19b0cc18a2a091d80166049a0c1ffcba822c Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:36:30 -0500 Subject: [PATCH 15/19] add guard and comment to header gen --- pgml-sdks/pgml/c/cbindgen.toml | 115 +-------------------------------- 1 file changed, 3 insertions(+), 112 deletions(-) diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml index 4efcf2453..07d509008 100644 --- a/pgml-sdks/pgml/c/cbindgen.toml +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -1,117 +1,8 @@ language = "C" - - -############## Options for Wrapping the Contents of the Header ################# - -# header = "/* Text to put at the beginning of the generated file. Probably a license. */" -# trailer = "/* Text to put at the end of the generated file */" -# include_guard = "my_bindings_h" -# pragma_once = true -# autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" -include_version = false -# namespace = "my_namespace" -namespaces = [] -using_namespaces = [] -sys_includes = [] -includes = [] -no_includes = false -# cpp_compat = true -after_includes = "" - - -############################ Code Style Options ################################ - -braces = "SameLine" -line_length = 100 -tab_width = 2 -documentation = true -documentation_style = "auto" -documentation_length = "full" -line_endings = "LF" # also "CR", "CRLF", "Native" - - -############################# Codegen Options ################################## - -style = "both" -sort_by = "Name" # default for `fn.sort_by` and `const.sort_by` -usize_is_size_t = true - -[defines] -# "target_os = freebsd" = "DEFINE_FREEBSD" -# "feature = serde" = "DEFINE_SERDE" - -[export] -include = [] -exclude = [] -# prefix = "CAPI_" -item_types = [] -renaming_overrides_prefixing = false - -[export.rename] - -[export.body] - -[export.mangle] - -[fn] -rename_args = "None" -# must_use = "MUST_USE_FUNC" -# deprecated = "DEPRECATED_FUNC" -# deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE" -# no_return = "NO_RETURN" -# prefix = "START_FUNC" -# postfix = "END_FUNC" -args = "auto" -sort_by = "Name" - -[struct] -rename_fields = "None" -# must_use = "MUST_USE_STRUCT" -# deprecated = "DEPRECATED_STRUCT" -# deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE" -derive_constructor = false -derive_eq = false -derive_neq = false -derive_lt = false -derive_lte = false -derive_gt = false -derive_gte = false - -[enum] -rename_variants = "None" -# must_use = "MUST_USE_ENUM" -# deprecated = "DEPRECATED_ENUM" -# deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE" -add_sentinel = false -prefix_with_name = false -derive_helper_methods = false -derive_const_casts = false -derive_mut_casts = false -# cast_assert_name = "ASSERT" -derive_tagged_enum_destructor = false -derive_tagged_enum_copy_constructor = false -enum_class = true -private_default_tagged_enum_constructor = false - -[const] -allow_static_const = true -allow_constexpr = false -sort_by = "Name" - -[macro_expansion] -bitflags = false - -############## Options for How Your Rust library Should Be Parsed ############## - -[parse] -parse_deps = false -# include = [] -exclude = [] -clean = false -extra_bindings = [] +header = "// Copyright (c) 2024 PostgresML Team" +include_guard = "PGML_H" +autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" [parse.expand] crates = ["pgml"] -all_features = false -default_features = true features = ["c"] From 8fe774b2c14825ea8b02c97504e94497046df87d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Fri, 24 May 2024 11:27:17 -0500 Subject: [PATCH 16/19] use typedefs in generated header --- pgml-sdks/pgml/c/cbindgen.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml index 07d509008..bc8424a26 100644 --- a/pgml-sdks/pgml/c/cbindgen.toml +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -2,6 +2,7 @@ language = "C" header = "// Copyright (c) 2024 PostgresML Team" include_guard = "PGML_H" autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +style = "type" [parse.expand] crates = ["pgml"] From bfe5e5ee65ef40ba56ce0b5b1cc3e410a3a786a7 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Fri, 24 May 2024 13:58:50 -0500 Subject: [PATCH 17/19] remove ignored files and some commented code --- pgml-sdks/pgml/.gitignore | 2 -- pgml-sdks/pgml/src/collection.rs | 29 ----------------------------- 2 files changed, 31 deletions(-) diff --git a/pgml-sdks/pgml/.gitignore b/pgml-sdks/pgml/.gitignore index e82a5d1fb..a20f70eac 100644 --- a/pgml-sdks/pgml/.gitignore +++ b/pgml-sdks/pgml/.gitignore @@ -169,6 +169,4 @@ scratch.sql scratch.py # Some SDK specific things -expanded.rs -test pgml.h diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index ba24420ab..676fe6f0c 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -102,35 +102,6 @@ pub(crate) struct CollectionDatabaseData { pub project_info: ProjectInfo, } -// #[repr(C)] -// pub struct CollectionC { -// pub collection: *mut Collection, -// } - -// #[no_mangle] -// pub unsafe extern "C" fn new_collection(name: *const std::ffi::c_char) -> *mut CollectionC { -// let name = std::ffi::CStr::from_ptr(name).to_str().unwrap(); -// println!("Nice one Silas: {}", name); -// let collection = Box::into_raw(Box::new(Collection::new(name, None).unwrap())); -// Box::into_raw(Box::new(CollectionC { collection })) -// } - -// #[no_mangle] -// pub unsafe extern "C" fn free_collection(collection: *mut CollectionC) { -// if collection.is_null() { -// return; -// } -// drop(Box::from_raw(collection)); -// } - -// #[no_mangle] -// pub unsafe extern "C" fn test_collection(collection: *mut CollectionC) { -// let collection: *mut Collection = (*collection).collection; -// let collection: Collection = (*collection).clone(); -// println!("Nice one Silas x two: {}", collection.name); -// println!("test"); -// } - /// A collection of documents #[derive(alias, Debug, Clone)] pub struct Collection { From 561f8fb7533222386f2885661279b0e2d8581088 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:32:05 -0500 Subject: [PATCH 18/19] remove commented code --- .../rust-bridge/rust-bridge-traits/src/c.rs | 72 +------------------ 1 file changed, 1 insertion(+), 71 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index 76cc80ee1..c06ac59d8 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -83,84 +83,14 @@ macro_rules! gen_custom_into { self } } - }; // (($($T1:ident),+), ($($T2:ident),+), ($($C:tt),+)) => { - // impl<$($T1, $T2: CustomInto<$T1>),+> CustomInto<($($T1),+,)> for ($($T2),+,) { - // fn custom_into(self) -> ($($T1),+,) { - // ($(self.$C.custom_into()),+,) - // } - // } - // } + }; } gen_custom_into!(()); gen_custom_into!(bool); -// impl> CustomInto> for Option { -// fn custom_into(self) -> Option { -// self.map(|s| s.custom_into()) -// } -// } - unsafe impl> CustomInto> for Vec { unsafe fn custom_into(self) -> Vec { self.into_iter().map(|x| x.custom_into()).collect() } } - -// impl, T2: CustomInto> -// CustomInto> for HashMap -// { -// fn custom_into(self) -> HashMap { -// self.into_iter() -// .map(|(k, v)| (k.custom_into(), v.custom_into())) -// .collect() -// } -// } - -// impl CustomInto<&'static str> for &str { -// fn custom_into(self) -> &'static str { -// // This is how we get around the liftime checker -// unsafe { -// let ptr = self as *const str; -// let ptr = ptr as *mut str; -// let boxed = Box::from_raw(ptr); -// Box::leak(boxed) -// } -// } -// } - -// gen_custom_into!((T1), (TT2), (0)); -// gen_custom_into!((T1, T2), (TT1, TT2), (0, 1)); -// gen_custom_into!((T1, T2, T3), (TT1, TT2, TT3), (0, 1, 2)); -// gen_custom_into!((T1, T2, T3, T4), (TT1, TT2, TT3, TT4), (0, 1, 2, 3)); -// gen_custom_into!( -// (T1, T2, T3, T4, T5), -// (TT1, TT2, TT3, TT4, TT5), -// (0, 1, 2, 3, 4) -// ); -// gen_custom_into!( -// (T1, T2, T3, T4, T5, T6), -// (TT1, TT2, TT3, TT4, TT5, TT6), -// (0, 1, 2, 3, 4, 5) -// ); - -// // There are some restrictions I cannot figure out around conflicting trait -// // implimentations so this is my solution for now -// gen_custom_into!(String); - -// gen_custom_into!(()); - -// gen_custom_into!(bool); - -// gen_custom_into!(i8); -// gen_custom_into!(i16); -// gen_custom_into!(i32); -// gen_custom_into!(i64); - -// gen_custom_into!(u8); -// gen_custom_into!(u16); -// gen_custom_into!(u32); -// gen_custom_into!(u64); - -// gen_custom_into!(f32); -// gen_custom_into!(f64); From 1dff030af2c107ffae3d300d5a132d7f27defc54 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:39:04 -0500 Subject: [PATCH 19/19] remove comment --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 642d67238..3f83b66ad 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -238,14 +238,6 @@ fn get_method_arguments( ) } -// Need: -// - go function arguments -// - go function argument prep for calling c function -// - go conversion from c returned value - For custom types this is always a wrapper for everything else this is a primitve type -// - c function arguments -// - c function arguments prep for calling rust function -// - arguments to call rust function with -// - c conversion from rust returned value - This is done with the into trait fn get_c_types( argument_name: &str, ty: &SupportedType, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy