Content-Length: 34570 | pFad | http://github.com/postgresml/postgresml/pull/1370.patch

thub.com From 24c8f25881336b161315a60922426f95a0115cc4 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 13 Mar 2024 15:04:30 -0700 Subject: [PATCH] Added some rust documentation --- pgml-sdks/pgml/src/builtins.rs | 2 +- pgml-sdks/pgml/src/collection.rs | 389 +++++++++++++++++++-- pgml-sdks/pgml/src/lib.rs | 2 +- pgml-sdks/pgml/src/model.rs | 21 +- pgml-sdks/pgml/src/open_source_ai.rs | 19 + pgml-sdks/pgml/src/pipeline.rs | 14 +- pgml-sdks/pgml/src/splitter.rs | 4 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 15 + pgml-sdks/pgml/src/types.rs | 7 +- 9 files changed, 415 insertions(+), 58 deletions(-) diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index db023b951..652bf0b8c 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -5,7 +5,7 @@ use tracing::instrument; //github.com/ Provides access to builtin database methods #[derive(alias, Debug, Clone)] pub struct Builtins { - pub database_url: Option, + database_url: Option, } use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json}; diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index a343920b1..69dd8574a 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -102,12 +102,10 @@ pub(crate) struct CollectionDatabaseData { //github.com/ A collection of documents #[derive(alias, Debug, Clone)] pub struct Collection { - pub name: String, - pub database_url: Option, - pub pipelines_table_name: String, - pub documents_table_name: String, - pub chunks_table_name: String, - pub documents_tsvectors_table_name: String, + pub(crate) name: String, + pub(crate) database_url: Option, + pub(crate) pipelines_table_name: String, + pub(crate) documents_table_name: String, pub(crate) database_data: Option, } @@ -137,16 +135,21 @@ impl Collection { //github.com/ Creates a new [Collection] //github.com/ //github.com/ # Arguments - //github.com/ //github.com/ * `name` - The name of the collection. //github.com/ * `database_url` - An optional database_url. If passed, this url will be used instead of - //github.com/ the `DATABASE_URL` environment variable. + //github.com/ the `PGML_DATABASE_URL` environment variable. //github.com/ - //github.com/ # Example + //github.com/ # Errors + //github.com/ * If the `name` is not composed of alphanumeric characters, whitespace, or '-' and '_' //github.com/ + //github.com/ # Example //github.com/ ``` //github.com/ use pgml::Collection; - //github.com/ let collection = Collection::new("my_collection", None); + //github.com/ use anyhow::Result; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ Ok(()) + //github.com/ } //github.com/ ``` pub fn new(name: &str, database_url: Option) -> anyhow::Result { if !name @@ -157,19 +160,12 @@ impl Collection { "Name must only consist of letters, numebers, white space, and '-' or '_'" ) } - let ( - pipelines_table_name, - documents_table_name, - chunks_table_name, - documents_tsvectors_table_name, - ) = Self::generate_table_names(name); + let (pipelines_table_name, documents_table_name) = Self::generate_table_names(name); Ok(Self { name: name.to_string(), database_url, pipelines_table_name, documents_table_name, - chunks_table_name, - documents_tsvectors_table_name, database_data: None, }) } @@ -261,6 +257,26 @@ impl Collection { } //github.com/ Adds a new [Pipeline] to the [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `pipeline` - The [Pipeline] to add to the [Collection] + //github.com/ + //github.com/ # Errors + //github.com/ * If the [Pipeline] does not have schema + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ use serde_json::json; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", Some(json!({}).into()))?; + //github.com/ collection.add_pipeline(&mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self))] pub async fn add_pipeline(&mut self, pipeline: &mut Pipeline) -> anyhow::Result<()> { // The flow for this function: @@ -305,6 +321,23 @@ impl Collection { } //github.com/ Removes a [Pipeline] from the [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `pipeline` - The [Pipeline] to remove from the [Collection] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ use serde_json::json; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ collection.remove_pipeline(&mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self))] pub async fn remove_pipeline(&mut self, pipeline: &Pipeline) -> anyhow::Result<()> { // The flow for this function: @@ -334,6 +367,26 @@ impl Collection { } //github.com/ Enables a [Pipeline] on the [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `pipeline` - The [Pipeline] to enable + //github.com/ + //github.com/ # Errors + //github.com/ * If the pipeline has not already been added to the [Collection] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ use serde_json::json; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ collection.enable_pipeline(&mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self))] pub async fn enable_pipeline(&mut self, pipeline: &mut Pipeline) -> anyhow::Result<()> { // The flow for this function: @@ -356,6 +409,26 @@ impl Collection { } //github.com/ Disables a [Pipeline] on the [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `pipeline` - The [Pipeline] to remove + //github.com/ + //github.com/ # Errors + //github.com/ * If the pipeline has not already been added to the [Collection] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ use serde_json::json; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ collection.disable_pipeline(&pipeline).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self))] pub async fn disable_pipeline(&self, pipeline: &Pipeline) -> anyhow::Result<()> { // The flow for this function: @@ -390,7 +463,23 @@ impl Collection { Ok(()) } - //github.com/ Upserts documents into the database + //github.com/ Upserts documents into [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `documents` - A vector of [Json] documents to upsert + //github.com/ * `args` - A [Json] object containing arguments for the upsert + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use anyhow::Result; + //github.com/ use serde_json::json; + //github.com/ async fn doc() -> Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ collection.upsert_documents(vec![json!({"id": "1", "name": "one"}).into()], None).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self, documents))] pub async fn upsert_documents( &mut self, @@ -558,6 +647,31 @@ impl Collection { } //github.com/ Gets the documents on a [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ + //github.com/ * `args` - A JSON object containing the following keys: + //github.com/ * `limit` - The maximum number of documents to return. Defaults to 1000. + //github.com/ * `order_by` - A JSON array of objects that specify the order of the documents to return. + //github.com/ Each object must have a `field` key with the name of the field to order by, and a `direction` + //github.com/ key with the value `asc` or `desc`. + //github.com/ * `last_row_id` - The id of the last document returned + //github.com/ * `offset` - The number of documents to skip before returning results. + //github.com/ * `filter` - A JSON object specifying the filter to apply to the documents. + //github.com/ + //github.com/ # Example + //github.com/ + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let collection = Collection::new("my_collection", None)?; + //github.com/ let documents = collection.get_documents(Some(json!({ + //github.com/ "limit": 2, + //github.com/ }).into())); + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn get_documents(&self, args: Option) -> anyhow::Result> { let pool = get_or_initialize_pool(&self.database_url).await?; @@ -617,6 +731,26 @@ impl Collection { } //github.com/ Deletes documents in a [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ + //github.com/ * `filter` - A JSON object specifying the filter to apply to the documents. + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let collection = Collection::new("my_collection", None)?; + //github.com/ collection.delete_documents(json!({ + //github.com/ "id": { + //github.com/ "$eq": 1 + //github.com/ } + //github.com/ }).into()); + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` #[instrument(skip(self))] pub async fn delete_documents(&self, filter: Json) -> anyhow::Result<()> { let pool = get_or_initialize_pool(&self.database_url).await?; @@ -633,6 +767,34 @@ impl Collection { } #[instrument(skip(self))] + //github.com/ Performs search over the documents in a [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ + //github.com/ * `query` - A JSON object specifying the query to perform. + //github.com/ * `pipeline` - The [Pipeline] to use for the search. + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ let results = collection.search(json!({ + //github.com/ "query": { + //github.com/ "semantic_search": { + //github.com/ "title": { + //github.com/ "query": "This is a an example query string", + //github.com/ }, + //github.com/ } + //github.com/ } + //github.com/ }).into(), &mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` pub async fn search(&mut self, query: Json, pipeline: &mut Pipeline) -> anyhow::Result { let pool = get_or_initialize_pool(&self.database_url).await?; let (built_query, values) = build_search_query(self, query.clone(), pipeline).await?; @@ -676,6 +838,7 @@ impl Collection { } #[instrument(skip(self))] + //github.com/ Same as search but the [Collection] is not mutable. This will not work with [Pipeline]s that use remote embeddings pub async fn search_local(&self, query: Json, pipeline: &Pipeline) -> anyhow::Result { let pool = get_or_initialize_pool(&self.database_url).await?; let (built_query, values) = build_search_query(self, query.clone(), pipeline).await?; @@ -689,6 +852,29 @@ impl Collection { Ok(results) } + //github.com/ Adds a search event to the database + //github.com/ + //github.com/ # Arguments + //github.com/ + //github.com/ * `search_id` - The id of the search + //github.com/ * `search_result` - The index of the search result + //github.com/ * `event` - The event to add + //github.com/ * `pipeline` - The [Pipeline] used for the search + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ collection.add_search_event(1, 1, json!({ + //github.com/ "event": "click", + //github.com/ }).into(), &mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn add_search_event( &self, @@ -723,6 +909,31 @@ impl Collection { } //github.com/ Performs vector search on the [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `query` - The query to search for + //github.com/ * `pipeline` - The [Pipeline] to use for the search + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ let results = collection.vector_search(json!({ + //github.com/ "query": { + //github.com/ "fields": { + //github.com/ "title": { + //github.com/ "query": "This is an example query string" + //github.com/ } + //github.com/ } + //github.com/ } + //github.com/ }).into(), &mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] #[allow(clippy::type_complexity)] pub async fn vector_search( @@ -784,6 +995,20 @@ impl Collection { } } + //github.com/ Archives a [Collection] + //github.com/ This will free up the name to be reused. It does not delete it. + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ collection.archive().await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn archive(&mut self) -> anyhow::Result<()> { let pool = get_or_initialize_pool(&self.database_url).await?; @@ -822,12 +1047,26 @@ impl Collection { Ok(()) } + //github.com/ A legacy query builder. + #[deprecated(since = "1.0.0", note = "please use `vector_search` instead")] #[instrument(skip(self))] pub fn query(&self) -> QueryBuilder { QueryBuilder::new(self.clone()) } //github.com/ Gets all pipelines for the [Collection] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let pipelines = collection.get_pipelines().await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn get_pipelines(&mut self) -> anyhow::Result> { self.verify_in_database(false).await?; @@ -842,6 +1081,21 @@ impl Collection { } //github.com/ Gets a [Pipeline] by name + //github.com/ + //github.com/ # Arguments + //github.com/ * `name` - The name of the [Pipeline] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let pipeline = collection.get_pipeline("my_pipeline").await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn get_pipeline(&mut self, name: &str) -> anyhow::Result { self.verify_in_database(false).await?; @@ -857,6 +1111,18 @@ impl Collection { } //github.com/ Check if the [Collection] exists in the database + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let exists = collection.exists().await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn exists(&self) -> anyhow::Result { let pool = get_or_initialize_pool(&self.database_url).await?; @@ -869,6 +1135,29 @@ impl Collection { Ok(collection.is_some()) } + //github.com/ Upsert all files in a directory that match the file_types + //github.com/ + //github.com/ # Arguments + //github.com/ * `path` - The path to the directory to upsert + //github.com/ * `args` - A [Json](serde_json::Value) object with the following keys: + //github.com/ * `file_types` - An array of file extensions to match. E.G. ['md', 'txt'] + //github.com/ * `file_batch_size` - The number of files to upsert at a time. Defaults to 10. + //github.com/ * `follow_links` - Whether to follow symlinks. Defaults to false. + //github.com/ * `ignore_paths` - An array of regexes to ignore. E.G. ['.*ignore.*'] + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use serde_json::json; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ collection.upsert_directory("/path/to/my/files", json!({ + //github.com/ "file_types": ["md", "txt"] + //github.com/ }).into()).await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn upsert_directory(&mut self, path: &str, args: Json) -> anyhow::Result<()> { self.verify_in_database(false).await?; @@ -944,6 +1233,22 @@ impl Collection { Ok(()) } + //github.com/ Gets the sync status of a [Pipeline] + //github.com/ + //github.com/ # Arguments + //github.com/ * `pipeline` - The [Pipeline] to get the sync status of + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ let status = collection.get_pipeline_status(&mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn get_pipeline_status(&mut self, pipeline: &mut Pipeline) -> anyhow::Result { self.verify_in_database(false).await?; @@ -952,6 +1257,20 @@ impl Collection { pipeline.get_status(project_info, &pool).await } + #[instrument(skip(self))] + //github.com/ Generates a PlantUML ER Diagram for a [Collection] and [Pipeline] tables + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use pgml::Pipeline; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?; + //github.com/ let er_diagram = collection.generate_er_diagram(&mut pipeline).await?; + //github.com/ Ok(()) + //github.com/ } #[instrument(skip(self))] pub async fn generate_er_diagram(&mut self, pipeline: &mut Pipeline) -> anyhow::Result { self.verify_in_database(false).await?; @@ -1074,6 +1393,21 @@ entity "{schema}.{key}_tsvectors" as {nice_name_key}_tsvectors {{ Ok(uml_entites) } + //github.com/ Upserts a file into a [Collection] + //github.com/ + //github.com/ # Arguments + //github.com/ * `path` - The path to the file to upsert + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::Collection; + //github.com/ use anyhow::Result; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let mut collection = Collection::new("my_collection", None)?; + //github.com/ collection.upsert_file("my_file.txt").await?; + //github.com/ Ok(()) + //github.com/ } + #[instrument(skip(self))] pub async fn upsert_file(&mut self, path: &str) -> anyhow::Result<()> { self.verify_in_database(false).await?; let path = Path::new(path); @@ -1085,16 +1419,11 @@ entity "{schema}.{key}_tsvectors" as {nice_name_key}_tsvectors {{ self.upsert_documents(vec![document.into()], None).await } - fn generate_table_names(name: &str) -> (String, String, String, String) { - [ - ".pipelines", - ".documents", - ".chunks", - ".documents_tsvectors", - ] - .into_iter() - .map(|s| format!("{}{}", name, s)) - .collect_tuple() - .unwrap() + fn generate_table_names(name: &str) -> (String, String) { + [".pipelines", ".documents"] + .into_iter() + .map(|s| format!("{}{}", name, s)) + .collect_tuple() + .unwrap() } } diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs index 50665ed93..34b02ce53 100644 --- a/pgml-sdks/pgml/src/lib.rs +++ b/pgml-sdks/pgml/src/lib.rs @@ -20,7 +20,7 @@ mod filter_builder; mod languages; pub mod migrations; mod model; -pub mod models; +mod models; mod open_source_ai; mod order_by_builder; mod pipeline; diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index ff320c0de..432654298 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -54,10 +54,10 @@ pub(crate) struct ModelDatabaseData { //github.com/ A model used for embedding, inference, etc... #[derive(alias, Debug, Clone)] pub struct Model { - pub name: String, - pub runtime: ModelRuntime, - pub parameters: Json, - pub(crate) database_data: Option, + pub(crate) name: String, + pub(crate) runtime: ModelRuntime, + pub(crate) parameters: Json, + database_data: Option, } impl Default for Model { @@ -69,19 +69,6 @@ impl Default for Model { #[alias_methods(new, transform)] impl Model { //github.com/ Creates a new [Model] - //github.com/ - //github.com/ # Arguments - //github.com/ - //github.com/ * `name` - The name of the model. - //github.com/ * `source` - The source of the model. Defaults to `pgml`, but can be set to providers like `openai`. - //github.com/ * `parameters` - The parameters to the model. Defaults to None - //github.com/ - //github.com/ # Example - //github.com/ - //github.com/ ``` - //github.com/ use pgml::Model; - //github.com/ let model = Model::new(Some("intfloat/e5-small".to_string()), None, None, None); - //github.com/ ``` pub fn new(name: Option, source: Option, parameters: Option) -> Self { let name = name.unwrap_or("intfloat/e5-small".to_string()); let parameters = parameters.unwrap_or(Json(serde_json::json!({}))); diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index d4c02215e..e21397a31 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -13,6 +13,7 @@ use crate::{ #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython}; +//github.com/ A drop in replacement for OpenAI #[derive(alias, Debug, Clone)] pub struct OpenSourceAI { database_url: Option, @@ -169,6 +170,20 @@ impl Iterator for AsyncToSyncJsonIterator { chat_completions_create_stream_async )] impl OpenSourceAI { + //github.com/ Creates a new [OpenSourceAI] + //github.com/ + //github.com/ # Arguments + //github.com/ + //github.com/ * `database_url`: The database url to use. If `None`, `PGML_DATABASE_URL` environment variable will be used. + //github.com/ + //github.com/ # Example + //github.com/ ``` + //github.com/ use pgml::OpenSourceAI; + //github.com/ async fn run() -> anyhow::Result<()> { + //github.com/ let ai = OpenSourceAI::new(None); + //github.com/ Ok(()) + //github.com/ } + //github.com/ ``` pub fn new(database_url: Option) -> Self { Self { database_url } } @@ -216,6 +231,7 @@ mistralai/Mistral-7B-v0.1 } } + //github.com/ Returns an async iterator of completions #[allow(clippy::too_many_arguments)] pub async fn chat_completions_create_stream_async( &self, @@ -278,6 +294,7 @@ mistralai/Mistral-7B-v0.1 Ok(GeneralJsonAsyncIterator(Box::pin(iter))) } + //github.com/ Returns an iterator of completions #[allow(clippy::too_many_arguments)] pub fn chat_completions_create_stream( &self, @@ -302,6 +319,7 @@ mistralai/Mistral-7B-v0.1 )))) } + //github.com/ An async function that returns completions #[allow(clippy::too_many_arguments)] pub async fn chat_completions_create_async( &self, @@ -371,6 +389,7 @@ mistralai/Mistral-7B-v0.1 .into()) } + //github.com/ A function that returns completions #[allow(clippy::too_many_arguments)] pub fn chat_completions_create( &self, diff --git a/pgml-sdks/pgml/src/pipeline.rs b/pgml-sdks/pgml/src/pipeline.rs index 6dada5159..02b059db3 100644 --- a/pgml-sdks/pgml/src/pipeline.rs +++ b/pgml-sdks/pgml/src/pipeline.rs @@ -175,11 +175,12 @@ pub struct PipelineDatabaseData { pub created_at: DateTime, } +//github.com/ A pipeline that describes transformations to documents #[derive(alias, Debug, Clone)] pub struct Pipeline { - pub name: String, - pub schema: Option, - pub parsed_schema: Option, + pub(crate) name: String, + pub(crate) schema: Option, + pub(crate) parsed_schema: Option, database_data: Option, } @@ -203,6 +204,11 @@ fn json_to_schema(schema: &Json) -> anyhow::Result { #[alias_methods(new)] impl Pipeline { + //github.com/ Creates a [Pipeline] + //github.com/ + //github.com/ # Arguments + //github.com/ * `name` - The name of the pipeline + //github.com/ * `schema` - The schema of the pipeline. This is a JSON object where the keys are the field names and the values are the field actions. pub fn new(name: &str, schema: Option) -> anyhow::Result { let parsed_schema = schema.as_ref().map(json_to_schema).transpose()?; Ok(Self { @@ -215,7 +221,7 @@ impl Pipeline { //github.com/ Gets the status of the [Pipeline] #[instrument(skip(self))] - pub async fn get_status( + pub(crate) async fn get_status( &mut self, project_info: &ProjectInfo, pool: &Pool, diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs index 96b1ed9da..a0847c879 100644 --- a/pgml-sdks/pgml/src/splitter.rs +++ b/pgml-sdks/pgml/src/splitter.rs @@ -21,8 +21,8 @@ pub(crate) struct SplitterDatabaseData { //github.com/ A text splitter #[derive(alias, Debug, Clone)] pub struct Splitter { - pub name: String, - pub parameters: Json, + pub(crate) name: String, + pub(crate) parameters: Json, pub(crate) database_data: Option, } diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index d20089463..43154615b 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -117,6 +117,13 @@ impl Stream for TransformerStream { #[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { + //github.com/ Creates a new [TransformerPipeline] + //github.com/ + //github.com/ # Arguments + //github.com/ * `task` - The task to run + //github.com/ * `model` - The model to use + //github.com/ * `args` - The arguments to pass to the task + //github.com/ * `database_url` - The database url to use. If None, the `PGML_DATABASE_URL` environment variable will be used pub fn new( task: &str, model: Option, @@ -141,6 +148,11 @@ impl TransformerPipeline { } } + //github.com/ Calls transform + //github.com/ + //github.com/ # Arguments + //github.com/ * `inputs` - The inputs to the task + //github.com/ * `args` - The arguments to pass to the task #[instrument(skip(self))] pub async fn transform(&self, inputs: Vec, args: Option) -> anyhow::Result { let pool = get_or_initialize_pool(&self.database_url).await?; @@ -176,6 +188,9 @@ impl TransformerPipeline { Ok(Json(results)) } + //github.com/ Calls transform + //github.com/ The same as transformer but it returns an iterator + //github.com/ The `batch_size` argument can be used to control the number of results returned in each batch #[instrument(skip(self))] pub async fn transform_stream( &self, diff --git a/pgml-sdks/pgml/src/types.rs b/pgml-sdks/pgml/src/types.rs index 1a51e4f20..34d93be5c 100644 --- a/pgml-sdks/pgml/src/types.rs +++ b/pgml-sdks/pgml/src/types.rs @@ -6,8 +6,7 @@ use sea_query::Iden; use serde::{Deserialize, Serialize}; use std::ops::{Deref, DerefMut}; -//github.com/ A wrapper around serde_json::Value -// #[derive(sqlx::Type, sqlx::FromRow, Debug)] +//github.com/ A wrapper around `serde_json::Value` #[derive(alias_manual, sqlx::Type, Debug, Clone, Deserialize, PartialEq, Eq)] #[sqlx(transparent)] pub struct Json(pub serde_json::Value); @@ -80,7 +79,7 @@ impl TryToNumeric for serde_json::Value { } } -//github.com/ A wrapper around sqlx::types::PrimitiveDateTime +//github.com/ A wrapper around `sqlx::types::PrimitiveDateTime` #[derive(sqlx::Type, Debug, Clone)] #[sqlx(transparent)] pub struct DateTime(pub sqlx::types::time::PrimitiveDateTime); @@ -124,6 +123,7 @@ impl IntoTableNameAndSchema for String { } } +//github.com/ A wrapper around `std::pin::Pin> + Send>>` #[derive(alias_manual)] pub struct GeneralJsonAsyncIterator( pub std::pin::Pin> + Send>>, @@ -140,6 +140,7 @@ impl Stream for GeneralJsonAsyncIterator { } } +//github.com/ A wrapper around `Box> + Send>` #[derive(alias_manual)] pub struct GeneralJsonIterator(pub Box> + Send>);








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgresml/postgresml/pull/1370.patch

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy