Content-Length: 34570 | pFad | http://github.com/postgresml/postgresml/pull/1370.patch
thub.com
From 24c8f25881336b161315a60922426f95a0115cc4 Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Wed, 13 Mar 2024 15:04:30 -0700
Subject: [PATCH] Added some rust documentation
---
pgml-sdks/pgml/src/builtins.rs | 2 +-
pgml-sdks/pgml/src/collection.rs | 389 +++++++++++++++++++--
pgml-sdks/pgml/src/lib.rs | 2 +-
pgml-sdks/pgml/src/model.rs | 21 +-
pgml-sdks/pgml/src/open_source_ai.rs | 19 +
pgml-sdks/pgml/src/pipeline.rs | 14 +-
pgml-sdks/pgml/src/splitter.rs | 4 +-
pgml-sdks/pgml/src/transformer_pipeline.rs | 15 +
pgml-sdks/pgml/src/types.rs | 7 +-
9 files changed, 415 insertions(+), 58 deletions(-)
diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs
index db023b951..652bf0b8c 100644
--- a/pgml-sdks/pgml/src/builtins.rs
+++ b/pgml-sdks/pgml/src/builtins.rs
@@ -5,7 +5,7 @@ use tracing::instrument;
//github.com/ Provides access to builtin database methods
#[derive(alias, Debug, Clone)]
pub struct Builtins {
- pub database_url: Option,
+ database_url: Option,
}
use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json};
diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs
index a343920b1..69dd8574a 100644
--- a/pgml-sdks/pgml/src/collection.rs
+++ b/pgml-sdks/pgml/src/collection.rs
@@ -102,12 +102,10 @@ pub(crate) struct CollectionDatabaseData {
//github.com/ A collection of documents
#[derive(alias, Debug, Clone)]
pub struct Collection {
- pub name: String,
- pub database_url: Option,
- pub pipelines_table_name: String,
- pub documents_table_name: String,
- pub chunks_table_name: String,
- pub documents_tsvectors_table_name: String,
+ pub(crate) name: String,
+ pub(crate) database_url: Option,
+ pub(crate) pipelines_table_name: String,
+ pub(crate) documents_table_name: String,
pub(crate) database_data: Option,
}
@@ -137,16 +135,21 @@ impl Collection {
//github.com/ Creates a new [Collection]
//github.com/
//github.com/ # Arguments
- //github.com/
//github.com/ * `name` - The name of the collection.
//github.com/ * `database_url` - An optional database_url. If passed, this url will be used instead of
- //github.com/ the `DATABASE_URL` environment variable.
+ //github.com/ the `PGML_DATABASE_URL` environment variable.
//github.com/
- //github.com/ # Example
+ //github.com/ # Errors
+ //github.com/ * If the `name` is not composed of alphanumeric characters, whitespace, or '-' and '_'
//github.com/
+ //github.com/ # Example
//github.com/ ```
//github.com/ use pgml::Collection;
- //github.com/ let collection = Collection::new("my_collection", None);
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ Ok(())
+ //github.com/ }
//github.com/ ```
pub fn new(name: &str, database_url: Option) -> anyhow::Result {
if !name
@@ -157,19 +160,12 @@ impl Collection {
"Name must only consist of letters, numebers, white space, and '-' or '_'"
)
}
- let (
- pipelines_table_name,
- documents_table_name,
- chunks_table_name,
- documents_tsvectors_table_name,
- ) = Self::generate_table_names(name);
+ let (pipelines_table_name, documents_table_name) = Self::generate_table_names(name);
Ok(Self {
name: name.to_string(),
database_url,
pipelines_table_name,
documents_table_name,
- chunks_table_name,
- documents_tsvectors_table_name,
database_data: None,
})
}
@@ -261,6 +257,26 @@ impl Collection {
}
//github.com/ Adds a new [Pipeline] to the [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `pipeline` - The [Pipeline] to add to the [Collection]
+ //github.com/
+ //github.com/ # Errors
+ //github.com/ * If the [Pipeline] does not have schema
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ use serde_json::json;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", Some(json!({}).into()))?;
+ //github.com/ collection.add_pipeline(&mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self))]
pub async fn add_pipeline(&mut self, pipeline: &mut Pipeline) -> anyhow::Result<()> {
// The flow for this function:
@@ -305,6 +321,23 @@ impl Collection {
}
//github.com/ Removes a [Pipeline] from the [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `pipeline` - The [Pipeline] to remove from the [Collection]
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ use serde_json::json;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ collection.remove_pipeline(&mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self))]
pub async fn remove_pipeline(&mut self, pipeline: &Pipeline) -> anyhow::Result<()> {
// The flow for this function:
@@ -334,6 +367,26 @@ impl Collection {
}
//github.com/ Enables a [Pipeline] on the [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `pipeline` - The [Pipeline] to enable
+ //github.com/
+ //github.com/ # Errors
+ //github.com/ * If the pipeline has not already been added to the [Collection]
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ use serde_json::json;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ collection.enable_pipeline(&mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self))]
pub async fn enable_pipeline(&mut self, pipeline: &mut Pipeline) -> anyhow::Result<()> {
// The flow for this function:
@@ -356,6 +409,26 @@ impl Collection {
}
//github.com/ Disables a [Pipeline] on the [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `pipeline` - The [Pipeline] to remove
+ //github.com/
+ //github.com/ # Errors
+ //github.com/ * If the pipeline has not already been added to the [Collection]
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ use serde_json::json;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ collection.disable_pipeline(&pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self))]
pub async fn disable_pipeline(&self, pipeline: &Pipeline) -> anyhow::Result<()> {
// The flow for this function:
@@ -390,7 +463,23 @@ impl Collection {
Ok(())
}
- //github.com/ Upserts documents into the database
+ //github.com/ Upserts documents into [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `documents` - A vector of [Json] documents to upsert
+ //github.com/ * `args` - A [Json] object containing arguments for the upsert
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use anyhow::Result;
+ //github.com/ use serde_json::json;
+ //github.com/ async fn doc() -> Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ collection.upsert_documents(vec![json!({"id": "1", "name": "one"}).into()], None).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self, documents))]
pub async fn upsert_documents(
&mut self,
@@ -558,6 +647,31 @@ impl Collection {
}
//github.com/ Gets the documents on a [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/
+ //github.com/ * `args` - A JSON object containing the following keys:
+ //github.com/ * `limit` - The maximum number of documents to return. Defaults to 1000.
+ //github.com/ * `order_by` - A JSON array of objects that specify the order of the documents to return.
+ //github.com/ Each object must have a `field` key with the name of the field to order by, and a `direction`
+ //github.com/ key with the value `asc` or `desc`.
+ //github.com/ * `last_row_id` - The id of the last document returned
+ //github.com/ * `offset` - The number of documents to skip before returning results.
+ //github.com/ * `filter` - A JSON object specifying the filter to apply to the documents.
+ //github.com/
+ //github.com/ # Example
+ //github.com/
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let collection = Collection::new("my_collection", None)?;
+ //github.com/ let documents = collection.get_documents(Some(json!({
+ //github.com/ "limit": 2,
+ //github.com/ }).into()));
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn get_documents(&self, args: Option) -> anyhow::Result> {
let pool = get_or_initialize_pool(&self.database_url).await?;
@@ -617,6 +731,26 @@ impl Collection {
}
//github.com/ Deletes documents in a [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/
+ //github.com/ * `filter` - A JSON object specifying the filter to apply to the documents.
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let collection = Collection::new("my_collection", None)?;
+ //github.com/ collection.delete_documents(json!({
+ //github.com/ "id": {
+ //github.com/ "$eq": 1
+ //github.com/ }
+ //github.com/ }).into());
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
#[instrument(skip(self))]
pub async fn delete_documents(&self, filter: Json) -> anyhow::Result<()> {
let pool = get_or_initialize_pool(&self.database_url).await?;
@@ -633,6 +767,34 @@ impl Collection {
}
#[instrument(skip(self))]
+ //github.com/ Performs search over the documents in a [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/
+ //github.com/ * `query` - A JSON object specifying the query to perform.
+ //github.com/ * `pipeline` - The [Pipeline] to use for the search.
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ let results = collection.search(json!({
+ //github.com/ "query": {
+ //github.com/ "semantic_search": {
+ //github.com/ "title": {
+ //github.com/ "query": "This is a an example query string",
+ //github.com/ },
+ //github.com/ }
+ //github.com/ }
+ //github.com/ }).into(), &mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
pub async fn search(&mut self, query: Json, pipeline: &mut Pipeline) -> anyhow::Result {
let pool = get_or_initialize_pool(&self.database_url).await?;
let (built_query, values) = build_search_query(self, query.clone(), pipeline).await?;
@@ -676,6 +838,7 @@ impl Collection {
}
#[instrument(skip(self))]
+ //github.com/ Same as search but the [Collection] is not mutable. This will not work with [Pipeline]s that use remote embeddings
pub async fn search_local(&self, query: Json, pipeline: &Pipeline) -> anyhow::Result {
let pool = get_or_initialize_pool(&self.database_url).await?;
let (built_query, values) = build_search_query(self, query.clone(), pipeline).await?;
@@ -689,6 +852,29 @@ impl Collection {
Ok(results)
}
+ //github.com/ Adds a search event to the database
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/
+ //github.com/ * `search_id` - The id of the search
+ //github.com/ * `search_result` - The index of the search result
+ //github.com/ * `event` - The event to add
+ //github.com/ * `pipeline` - The [Pipeline] used for the search
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ collection.add_search_event(1, 1, json!({
+ //github.com/ "event": "click",
+ //github.com/ }).into(), &mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn add_search_event(
&self,
@@ -723,6 +909,31 @@ impl Collection {
}
//github.com/ Performs vector search on the [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `query` - The query to search for
+ //github.com/ * `pipeline` - The [Pipeline] to use for the search
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ let results = collection.vector_search(json!({
+ //github.com/ "query": {
+ //github.com/ "fields": {
+ //github.com/ "title": {
+ //github.com/ "query": "This is an example query string"
+ //github.com/ }
+ //github.com/ }
+ //github.com/ }
+ //github.com/ }).into(), &mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
#[allow(clippy::type_complexity)]
pub async fn vector_search(
@@ -784,6 +995,20 @@ impl Collection {
}
}
+ //github.com/ Archives a [Collection]
+ //github.com/ This will free up the name to be reused. It does not delete it.
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ collection.archive().await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn archive(&mut self) -> anyhow::Result<()> {
let pool = get_or_initialize_pool(&self.database_url).await?;
@@ -822,12 +1047,26 @@ impl Collection {
Ok(())
}
+ //github.com/ A legacy query builder.
+ #[deprecated(since = "1.0.0", note = "please use `vector_search` instead")]
#[instrument(skip(self))]
pub fn query(&self) -> QueryBuilder {
QueryBuilder::new(self.clone())
}
//github.com/ Gets all pipelines for the [Collection]
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let pipelines = collection.get_pipelines().await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn get_pipelines(&mut self) -> anyhow::Result> {
self.verify_in_database(false).await?;
@@ -842,6 +1081,21 @@ impl Collection {
}
//github.com/ Gets a [Pipeline] by name
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `name` - The name of the [Pipeline]
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let pipeline = collection.get_pipeline("my_pipeline").await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn get_pipeline(&mut self, name: &str) -> anyhow::Result {
self.verify_in_database(false).await?;
@@ -857,6 +1111,18 @@ impl Collection {
}
//github.com/ Check if the [Collection] exists in the database
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let exists = collection.exists().await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn exists(&self) -> anyhow::Result {
let pool = get_or_initialize_pool(&self.database_url).await?;
@@ -869,6 +1135,29 @@ impl Collection {
Ok(collection.is_some())
}
+ //github.com/ Upsert all files in a directory that match the file_types
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `path` - The path to the directory to upsert
+ //github.com/ * `args` - A [Json](serde_json::Value) object with the following keys:
+ //github.com/ * `file_types` - An array of file extensions to match. E.G. ['md', 'txt']
+ //github.com/ * `file_batch_size` - The number of files to upsert at a time. Defaults to 10.
+ //github.com/ * `follow_links` - Whether to follow symlinks. Defaults to false.
+ //github.com/ * `ignore_paths` - An array of regexes to ignore. E.G. ['.*ignore.*']
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use serde_json::json;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ collection.upsert_directory("/path/to/my/files", json!({
+ //github.com/ "file_types": ["md", "txt"]
+ //github.com/ }).into()).await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn upsert_directory(&mut self, path: &str, args: Json) -> anyhow::Result<()> {
self.verify_in_database(false).await?;
@@ -944,6 +1233,22 @@ impl Collection {
Ok(())
}
+ //github.com/ Gets the sync status of a [Pipeline]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `pipeline` - The [Pipeline] to get the sync status of
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ let status = collection.get_pipeline_status(&mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn get_pipeline_status(&mut self, pipeline: &mut Pipeline) -> anyhow::Result {
self.verify_in_database(false).await?;
@@ -952,6 +1257,20 @@ impl Collection {
pipeline.get_status(project_info, &pool).await
}
+ #[instrument(skip(self))]
+ //github.com/ Generates a PlantUML ER Diagram for a [Collection] and [Pipeline] tables
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use pgml::Pipeline;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ let mut pipeline = Pipeline::new("my_pipeline", None)?;
+ //github.com/ let er_diagram = collection.generate_er_diagram(&mut pipeline).await?;
+ //github.com/ Ok(())
+ //github.com/ }
#[instrument(skip(self))]
pub async fn generate_er_diagram(&mut self, pipeline: &mut Pipeline) -> anyhow::Result {
self.verify_in_database(false).await?;
@@ -1074,6 +1393,21 @@ entity "{schema}.{key}_tsvectors" as {nice_name_key}_tsvectors {{
Ok(uml_entites)
}
+ //github.com/ Upserts a file into a [Collection]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `path` - The path to the file to upsert
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::Collection;
+ //github.com/ use anyhow::Result;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let mut collection = Collection::new("my_collection", None)?;
+ //github.com/ collection.upsert_file("my_file.txt").await?;
+ //github.com/ Ok(())
+ //github.com/ }
+ #[instrument(skip(self))]
pub async fn upsert_file(&mut self, path: &str) -> anyhow::Result<()> {
self.verify_in_database(false).await?;
let path = Path::new(path);
@@ -1085,16 +1419,11 @@ entity "{schema}.{key}_tsvectors" as {nice_name_key}_tsvectors {{
self.upsert_documents(vec![document.into()], None).await
}
- fn generate_table_names(name: &str) -> (String, String, String, String) {
- [
- ".pipelines",
- ".documents",
- ".chunks",
- ".documents_tsvectors",
- ]
- .into_iter()
- .map(|s| format!("{}{}", name, s))
- .collect_tuple()
- .unwrap()
+ fn generate_table_names(name: &str) -> (String, String) {
+ [".pipelines", ".documents"]
+ .into_iter()
+ .map(|s| format!("{}{}", name, s))
+ .collect_tuple()
+ .unwrap()
}
}
diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs
index 50665ed93..34b02ce53 100644
--- a/pgml-sdks/pgml/src/lib.rs
+++ b/pgml-sdks/pgml/src/lib.rs
@@ -20,7 +20,7 @@ mod filter_builder;
mod languages;
pub mod migrations;
mod model;
-pub mod models;
+mod models;
mod open_source_ai;
mod order_by_builder;
mod pipeline;
diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs
index ff320c0de..432654298 100644
--- a/pgml-sdks/pgml/src/model.rs
+++ b/pgml-sdks/pgml/src/model.rs
@@ -54,10 +54,10 @@ pub(crate) struct ModelDatabaseData {
//github.com/ A model used for embedding, inference, etc...
#[derive(alias, Debug, Clone)]
pub struct Model {
- pub name: String,
- pub runtime: ModelRuntime,
- pub parameters: Json,
- pub(crate) database_data: Option,
+ pub(crate) name: String,
+ pub(crate) runtime: ModelRuntime,
+ pub(crate) parameters: Json,
+ database_data: Option,
}
impl Default for Model {
@@ -69,19 +69,6 @@ impl Default for Model {
#[alias_methods(new, transform)]
impl Model {
//github.com/ Creates a new [Model]
- //github.com/
- //github.com/ # Arguments
- //github.com/
- //github.com/ * `name` - The name of the model.
- //github.com/ * `source` - The source of the model. Defaults to `pgml`, but can be set to providers like `openai`.
- //github.com/ * `parameters` - The parameters to the model. Defaults to None
- //github.com/
- //github.com/ # Example
- //github.com/
- //github.com/ ```
- //github.com/ use pgml::Model;
- //github.com/ let model = Model::new(Some("intfloat/e5-small".to_string()), None, None, None);
- //github.com/ ```
pub fn new(name: Option, source: Option, parameters: Option) -> Self {
let name = name.unwrap_or("intfloat/e5-small".to_string());
let parameters = parameters.unwrap_or(Json(serde_json::json!({})));
diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs
index d4c02215e..e21397a31 100644
--- a/pgml-sdks/pgml/src/open_source_ai.rs
+++ b/pgml-sdks/pgml/src/open_source_ai.rs
@@ -13,6 +13,7 @@ use crate::{
#[cfg(feature = "python")]
use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython};
+//github.com/ A drop in replacement for OpenAI
#[derive(alias, Debug, Clone)]
pub struct OpenSourceAI {
database_url: Option,
@@ -169,6 +170,20 @@ impl Iterator for AsyncToSyncJsonIterator {
chat_completions_create_stream_async
)]
impl OpenSourceAI {
+ //github.com/ Creates a new [OpenSourceAI]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/
+ //github.com/ * `database_url`: The database url to use. If `None`, `PGML_DATABASE_URL` environment variable will be used.
+ //github.com/
+ //github.com/ # Example
+ //github.com/ ```
+ //github.com/ use pgml::OpenSourceAI;
+ //github.com/ async fn run() -> anyhow::Result<()> {
+ //github.com/ let ai = OpenSourceAI::new(None);
+ //github.com/ Ok(())
+ //github.com/ }
+ //github.com/ ```
pub fn new(database_url: Option) -> Self {
Self { database_url }
}
@@ -216,6 +231,7 @@ mistralai/Mistral-7B-v0.1
}
}
+ //github.com/ Returns an async iterator of completions
#[allow(clippy::too_many_arguments)]
pub async fn chat_completions_create_stream_async(
&self,
@@ -278,6 +294,7 @@ mistralai/Mistral-7B-v0.1
Ok(GeneralJsonAsyncIterator(Box::pin(iter)))
}
+ //github.com/ Returns an iterator of completions
#[allow(clippy::too_many_arguments)]
pub fn chat_completions_create_stream(
&self,
@@ -302,6 +319,7 @@ mistralai/Mistral-7B-v0.1
))))
}
+ //github.com/ An async function that returns completions
#[allow(clippy::too_many_arguments)]
pub async fn chat_completions_create_async(
&self,
@@ -371,6 +389,7 @@ mistralai/Mistral-7B-v0.1
.into())
}
+ //github.com/ A function that returns completions
#[allow(clippy::too_many_arguments)]
pub fn chat_completions_create(
&self,
diff --git a/pgml-sdks/pgml/src/pipeline.rs b/pgml-sdks/pgml/src/pipeline.rs
index 6dada5159..02b059db3 100644
--- a/pgml-sdks/pgml/src/pipeline.rs
+++ b/pgml-sdks/pgml/src/pipeline.rs
@@ -175,11 +175,12 @@ pub struct PipelineDatabaseData {
pub created_at: DateTime,
}
+//github.com/ A pipeline that describes transformations to documents
#[derive(alias, Debug, Clone)]
pub struct Pipeline {
- pub name: String,
- pub schema: Option,
- pub parsed_schema: Option,
+ pub(crate) name: String,
+ pub(crate) schema: Option,
+ pub(crate) parsed_schema: Option,
database_data: Option,
}
@@ -203,6 +204,11 @@ fn json_to_schema(schema: &Json) -> anyhow::Result {
#[alias_methods(new)]
impl Pipeline {
+ //github.com/ Creates a [Pipeline]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `name` - The name of the pipeline
+ //github.com/ * `schema` - The schema of the pipeline. This is a JSON object where the keys are the field names and the values are the field actions.
pub fn new(name: &str, schema: Option) -> anyhow::Result {
let parsed_schema = schema.as_ref().map(json_to_schema).transpose()?;
Ok(Self {
@@ -215,7 +221,7 @@ impl Pipeline {
//github.com/ Gets the status of the [Pipeline]
#[instrument(skip(self))]
- pub async fn get_status(
+ pub(crate) async fn get_status(
&mut self,
project_info: &ProjectInfo,
pool: &Pool,
diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs
index 96b1ed9da..a0847c879 100644
--- a/pgml-sdks/pgml/src/splitter.rs
+++ b/pgml-sdks/pgml/src/splitter.rs
@@ -21,8 +21,8 @@ pub(crate) struct SplitterDatabaseData {
//github.com/ A text splitter
#[derive(alias, Debug, Clone)]
pub struct Splitter {
- pub name: String,
- pub parameters: Json,
+ pub(crate) name: String,
+ pub(crate) parameters: Json,
pub(crate) database_data: Option,
}
diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs
index d20089463..43154615b 100644
--- a/pgml-sdks/pgml/src/transformer_pipeline.rs
+++ b/pgml-sdks/pgml/src/transformer_pipeline.rs
@@ -117,6 +117,13 @@ impl Stream for TransformerStream {
#[alias_methods(new, transform, transform_stream)]
impl TransformerPipeline {
+ //github.com/ Creates a new [TransformerPipeline]
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `task` - The task to run
+ //github.com/ * `model` - The model to use
+ //github.com/ * `args` - The arguments to pass to the task
+ //github.com/ * `database_url` - The database url to use. If None, the `PGML_DATABASE_URL` environment variable will be used
pub fn new(
task: &str,
model: Option,
@@ -141,6 +148,11 @@ impl TransformerPipeline {
}
}
+ //github.com/ Calls transform
+ //github.com/
+ //github.com/ # Arguments
+ //github.com/ * `inputs` - The inputs to the task
+ //github.com/ * `args` - The arguments to pass to the task
#[instrument(skip(self))]
pub async fn transform(&self, inputs: Vec, args: Option) -> anyhow::Result {
let pool = get_or_initialize_pool(&self.database_url).await?;
@@ -176,6 +188,9 @@ impl TransformerPipeline {
Ok(Json(results))
}
+ //github.com/ Calls transform
+ //github.com/ The same as transformer but it returns an iterator
+ //github.com/ The `batch_size` argument can be used to control the number of results returned in each batch
#[instrument(skip(self))]
pub async fn transform_stream(
&self,
diff --git a/pgml-sdks/pgml/src/types.rs b/pgml-sdks/pgml/src/types.rs
index 1a51e4f20..34d93be5c 100644
--- a/pgml-sdks/pgml/src/types.rs
+++ b/pgml-sdks/pgml/src/types.rs
@@ -6,8 +6,7 @@ use sea_query::Iden;
use serde::{Deserialize, Serialize};
use std::ops::{Deref, DerefMut};
-//github.com/ A wrapper around serde_json::Value
-// #[derive(sqlx::Type, sqlx::FromRow, Debug)]
+//github.com/ A wrapper around `serde_json::Value`
#[derive(alias_manual, sqlx::Type, Debug, Clone, Deserialize, PartialEq, Eq)]
#[sqlx(transparent)]
pub struct Json(pub serde_json::Value);
@@ -80,7 +79,7 @@ impl TryToNumeric for serde_json::Value {
}
}
-//github.com/ A wrapper around sqlx::types::PrimitiveDateTime
+//github.com/ A wrapper around `sqlx::types::PrimitiveDateTime`
#[derive(sqlx::Type, Debug, Clone)]
#[sqlx(transparent)]
pub struct DateTime(pub sqlx::types::time::PrimitiveDateTime);
@@ -124,6 +123,7 @@ impl IntoTableNameAndSchema for String {
}
}
+//github.com/ A wrapper around `std::pin::Pin> + Send>>`
#[derive(alias_manual)]
pub struct GeneralJsonAsyncIterator(
pub std::pin::Pin> + Send>>,
@@ -140,6 +140,7 @@ impl Stream for GeneralJsonAsyncIterator {
}
}
+//github.com/ A wrapper around `Box> + Send>`
#[derive(alias_manual)]
pub struct GeneralJsonIterator(pub Box> + Send>);
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/postgresml/postgresml/pull/1370.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy