diff --git a/pgml-cms/docs/product/cloud-database/serverless.md b/pgml-cms/docs/product/cloud-database/serverless.md index e078f0067..fe08972ed 100644 --- a/pgml-cms/docs/product/cloud-database/serverless.md +++ b/pgml-cms/docs/product/cloud-database/serverless.md @@ -19,39 +19,14 @@ Once logged in, select "New Database" from the left menu and choose the Serverle ### Serverless Pricing Storage is charged per GB/mo, and all requests by CPU or GPU millisecond of compute required to perform them. -#### Vector & Relational Database -| NAME | PRICING | -| :--- | ---: | -| Tables & Index Storage | $0.20 GB per month | -| Retrieval, Filtering, Ranking & other Queries | $7.50 per hour | -| Embeddings | Included w/ Queries | -| LLMs | Included w/ Queries | -| Fine Tuning | Included w/ Queries | -| Machine Learning | Included w/ Queries | - + +Loading our current pricing model... + ### Serverless Models Serverless AI engines come with predefined models and a flexible pricing structure -#### Embedding Models -| NAME | PARAMETERS (M) | MAX INPUT TOKENS | DIMENSIONS | STRENGTHS | -| --- | --- | --- | --- | --- | -| intfloat/e5-large-v2 | 33.4 | 512 | 384 | High quality, low latency | -| mixedbread-ai/mxbai-embed-large-v1 | 334 | 512 | 1024 | High quality, higher latency | -| Alibaba-NLP/gte-base-en-v1.5 | 137 | 8192 | 768 | Supports up to 8k input tokens, low latency | -| Alibaba-NLP/gte-large-en-v1.5 | 434 | 8192 | 1024 | Supports up to 8k input tokens, higher latency | - -#### Instruct Models -| NAME | TOTAL PARAMETERS (M) | ACTIVE PARAMETERS (M) | CONTEXT SIZE | STRENGTHS | -| --- | --- | --- | --- | --- | -| meta-llama/Meta-Llama-3-70B-Instruct | 70,000 | 70,000 | 8,000 | High quality | -| meta-llama/Meta-Llama-3-8B-Instruct | 8,000 | 8,000 | 8,000 | High quality, low latency | -| microsoft/Phi-3-mini-128k-instruct | 3,820 | 3,820 | 128,000 | Lowest latency | -| mistralai/Mixtral-8x7B-Instruct-v0.1 | 56,000 | 12,900 | 32,768 | MOE high quality | -| mistralai/Mistral-7B-Instruct-v0.2 | 7,000 | 7,000 | 32,768 | High quality, low latency | - -#### Summarization Models -| NAME | PARAMETERS (M) | CONTEXT SIZE | STRENGTHS | -| --- | --- | --- | --- | -| google/pegasus-xsum | 568 | 512 | Efficient summarization | + +Loading our current serverless models offered... + diff --git a/pgml-dashboard/Cargo.lock b/pgml-dashboard/Cargo.lock index 0fe0b481d..59e710ba5 100644 --- a/pgml-dashboard/Cargo.lock +++ b/pgml-dashboard/Cargo.lock @@ -315,6 +315,12 @@ version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.6.0" @@ -487,7 +493,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim", + "strsim 0.10.0", "terminal_size", ] @@ -852,8 +858,18 @@ version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.14.4", + "darling_macro 0.14.4", +] + +[[package]] +name = "darling" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +dependencies = [ + "darling_core 0.20.9", + "darling_macro 0.20.9", ] [[package]] @@ -866,21 +882,46 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.10.0", "syn 1.0.109", ] +[[package]] +name = "darling_core" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.32", +] + [[package]] name = "darling_macro" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ - "darling_core", + "darling_core 0.14.4", "quote", "syn 1.0.109", ] +[[package]] +name = "darling_macro" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +dependencies = [ + "darling_core 0.20.9", + "quote", + "syn 2.0.32", +] + [[package]] name = "data-encoding" version = "2.5.0" @@ -1723,6 +1764,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -2544,7 +2586,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pgml" -version = "1.0.2" +version = "1.0.4" dependencies = [ "anyhow", "async-trait", @@ -2568,6 +2610,7 @@ dependencies = [ "sea-query-binder", "serde", "serde_json", + "serde_with", "sqlx", "tokio", "tracing", @@ -3500,7 +3543,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "878cf3d57f0e5bfacd425cdaccc58b4c06d68a7b71c63fc28710a20c88676808" dependencies = [ - "darling", + "darling 0.14.4", "heck", "quote", "syn 1.0.109", @@ -3775,6 +3818,36 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad483d2ab0149d5a5ebcd9972a3852711e0153d863bf5a5d0391d28883c4a20" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.0.0", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65569b702f41443e8bc8bbb1c5779bd0450bbe723b56198980e80ec45780bce2" +dependencies = [ + "darling 0.20.9", + "proc-macro2", + "quote", + "syn 2.0.32", +] + [[package]] name = "servo_arc" version = "0.3.0" @@ -4225,6 +4298,12 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.5.0" diff --git a/pgml-dashboard/src/components/tables/mod.rs b/pgml-dashboard/src/components/tables/mod.rs index efb5303de..4fe33ae05 100644 --- a/pgml-dashboard/src/components/tables/mod.rs +++ b/pgml-dashboard/src/components/tables/mod.rs @@ -4,5 +4,13 @@ // src/components/tables/large pub mod large; +// src/components/tables/serverless_models +pub mod serverless_models; +pub use serverless_models::ServerlessModels; + +// src/components/tables/serverless_pricing +pub mod serverless_pricing; +pub use serverless_pricing::ServerlessPricing; + // src/components/tables/small pub mod small; diff --git a/pgml-dashboard/src/components/tables/serverless_models/mod.rs b/pgml-dashboard/src/components/tables/serverless_models/mod.rs new file mode 100644 index 000000000..b77ead764 --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_models/mod.rs @@ -0,0 +1,114 @@ +use crate::components::tables::small::row::Row; +use pgml_components::component; +use pgml_components::Component; +use sailfish::TemplateOnce; + +#[derive(TemplateOnce, Default)] +#[template(path = "tables/serverless_models/template.html")] +pub struct ServerlessModels { + style_type: String, + embedding_models: [Component; 4], + instruct_models: [Component; 5], + summarization_models: [Component; 1], +} + +impl ServerlessModels { + pub fn new() -> ServerlessModels { + ServerlessModels { + style_type: "product".to_string(), + embedding_models: [ + Component::from(Row::new(&[ + "intfloat/e5-small-v2".into(), + "33.4".into(), + "512".into(), + "384".into(), + "Good quality, low latency".into(), + ])), + Component::from(Row::new(&[ + "mixedbread-ai/mxbai-embed-large-v1".into(), + "335".into(), + "512".into(), + "1024".into(), + "High quality, higher latency".into(), + ])), + Component::from(Row::new(&[ + "Alibaba-NLP/gte-base-en-v1.5".into(), + "137".into(), + "8192".into(), + "768".into(), + "Supports up to 8,000 input tokens".into(), + ])), + Component::from(Row::new(&[ + "Alibaba-NLP/gte-large-en-v1.5".into(), + "434".into(), + "8192".into(), + "1024".into(), + "Highest quality, 8,000 input tokens".into(), + ])), + ], + instruct_models: [ + Component::from(Row::new(&[ + "meta-llama/Meta-Llama-3-70B-Instruct".into(), + "70,000".into(), + "70,000".into(), + "8,000".into(), + "Highest quality".into(), + ])), + Component::from(Row::new(&[ + "meta-llama/Meta-Llama-3-8B-Instruct".into(), + "8,000".into(), + "8,000".into(), + "8,000".into(), + "High quality, low latency".into(), + ])), + Component::from(Row::new(&[ + "microsoft/Phi-3-mini-128k-instruct".into(), + "3,820".into(), + "3,820".into(), + "128,000".into(), + "Lowest latency".into(), + ])), + Component::from(Row::new(&[ + "mistralai/Mixtral-8x7B-Instruct-v0.1".into(), + "56,000".into(), + "12,900".into(), + "32,768".into(), + "MOE high quality".into(), + ])), + Component::from(Row::new(&[ + "mistralai/Mistral-7B-Instruct-v0.2".into(), + "7,000".into(), + "7,000".into(), + "32,768".into(), + "High quality, low latency".into(), + ])), + ], + summarization_models: [Component::from(Row::new(&[ + "google/pegasus-xsum".into(), + "568".into(), + "512".into(), + "8,000".into(), + ]))], + } + } + + pub fn set_style_type(mut self, style_type: &str) -> Self { + self.style_type = style_type.to_string(); + self + } +} + +#[derive(TemplateOnce, Default)] +#[template(path = "tables/serverless_models/turbotemplate.html")] +pub struct ServerlessModelsTurbo { + comp: Component, +} + +impl ServerlessModelsTurbo { + pub fn new(comp: Component) -> ServerlessModelsTurbo { + ServerlessModelsTurbo { comp } + } +} + +component!(ServerlessModels); +component!(ServerlessModelsTurbo); diff --git a/pgml-dashboard/src/components/tables/serverless_models/serverless_models.scss b/pgml-dashboard/src/components/tables/serverless_models/serverless_models.scss new file mode 100644 index 000000000..6c870681a --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_models/serverless_models.scss @@ -0,0 +1,7 @@ +div[data-controller="tables-serverless-models"] { + table.table.table-sm thead th, table.table-sm thead th { + vertical-align: top; + padding-top: 8px; + padding-bottom: 8px; + } +} diff --git a/pgml-dashboard/src/components/tables/serverless_models/template.html b/pgml-dashboard/src/components/tables/serverless_models/template.html new file mode 100644 index 000000000..65096e0bc --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_models/template.html @@ -0,0 +1,43 @@ +<% + use crate::components::tables::small::*; + + let heading_style = if style_type == "product" { + "text-gradient-green h6 fw-semibold mb-0 " + } else { + "text-white h6 fw-semibold mb-0" + }; +%> + +
+

Embedding Models

+
+ <%+ Table::new(&[ + "Name", + "Parameters (M)", + "Max input tokens", + "Dimensions", + "Strengths", + ], &embedding_models) %> +
+ +

Instruct Models

+
+ <%+ Table::new(&[ + "Name", + "Parameters (M)", + "Active Parameters (M)", + "Context size", + "Strengths", + ], &instruct_models) %> +
+ +

Summarization Models

+
+ <%+ Table::new(&[ + "Name", + "Parameters (M)", + "Context size", + "Strengths", + ], &summarization_models) %> +
+
diff --git a/pgml-dashboard/src/components/tables/serverless_models/turbotemplate.html b/pgml-dashboard/src/components/tables/serverless_models/turbotemplate.html new file mode 100644 index 000000000..e8e8f967e --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_models/turbotemplate.html @@ -0,0 +1,3 @@ + + <%+ comp %> + diff --git a/pgml-dashboard/src/components/tables/serverless_pricing/mod.rs b/pgml-dashboard/src/components/tables/serverless_pricing/mod.rs new file mode 100644 index 000000000..e61a2e2f1 --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_pricing/mod.rs @@ -0,0 +1,53 @@ +use crate::components::tables::small::row::Row; +use pgml_components::component; +use pgml_components::Component; +use sailfish::TemplateOnce; + +#[derive(TemplateOnce, Default)] +#[template(path = "tables/serverless_pricing/template.html")] +pub struct ServerlessPricing { + style_type: String, + pricing: [Component; 6], +} + +impl ServerlessPricing { + pub fn new() -> ServerlessPricing { + ServerlessPricing { + style_type: "product".to_string(), + pricing: [ + Component::from(Row::new(&[ + "Tables & index storage".into(), + "$0.25/GB per month".into(), + ])), + Component::from(Row::new(&[ + "Retrieval, filtering, ranking & other queries".into(), + "$7.50 per hour".into(), + ])), + Component::from(Row::new(&["Embeddings".into(), "Included w/ queries".into()])), + Component::from(Row::new(&["LLMs".into(), "Included w/ queries".into()])), + Component::from(Row::new(&["Fine tuning".into(), "Included w/ queries".into()])), + Component::from(Row::new(&["Machine learning".into(), "Included w/ queries".into()])), + ], + } + } + + pub fn set_style_type(mut self, style_type: &str) -> ServerlessPricing { + self.style_type = style_type.to_string(); + self + } +} + +#[derive(TemplateOnce, Default)] +#[template(path = "tables/serverless_pricing/turbotemplate.html")] +pub struct ServerlessPricingTurbo { + comp: Component, +} + +impl ServerlessPricingTurbo { + pub fn new(comp: Component) -> ServerlessPricingTurbo { + ServerlessPricingTurbo { comp } + } +} + +component!(ServerlessPricing); +component!(ServerlessPricingTurbo); diff --git a/pgml-dashboard/src/components/tables/serverless_pricing/serverless_pricing.scss b/pgml-dashboard/src/components/tables/serverless_pricing/serverless_pricing.scss new file mode 100644 index 000000000..2c5d66cb5 --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_pricing/serverless_pricing.scss @@ -0,0 +1,7 @@ +div[data-controller="tables-serverless-pricing"] { + table.table.table-sm thead th, table.table-sm thead th { + vertical-align: top; + padding-top: 8px; + padding-bottom: 8px; + } +} diff --git a/pgml-dashboard/src/components/tables/serverless_pricing/template.html b/pgml-dashboard/src/components/tables/serverless_pricing/template.html new file mode 100644 index 000000000..e6588a0ba --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_pricing/template.html @@ -0,0 +1,21 @@ +<% + use crate::components::tables::small::*; + + let heading_style = if style_type == "product" { + "text-gradient-green" + } else { + "text-white" + }; +%> + +
+

+ Vector & Relational Database +

+
+ <%+ Table::new(&[ + "Name", + "Pricing", + ], &pricing) %> +
+
diff --git a/pgml-dashboard/src/components/tables/serverless_pricing/turbotemplate.html b/pgml-dashboard/src/components/tables/serverless_pricing/turbotemplate.html new file mode 100644 index 000000000..84da50d76 --- /dev/null +++ b/pgml-dashboard/src/components/tables/serverless_pricing/turbotemplate.html @@ -0,0 +1,3 @@ + + <%+ comp %> + diff --git a/pgml-dashboard/src/lib.rs b/pgml-dashboard/src/lib.rs index b8fc11a7d..ce582c76f 100644 --- a/pgml-dashboard/src/lib.rs +++ b/pgml-dashboard/src/lib.rs @@ -25,6 +25,8 @@ use guards::Cluster; use responses::{Error, ResponseOk}; use templates::{components::StaticNav, *}; +use crate::components::tables::serverless_models::{ServerlessModels, ServerlessModelsTurbo}; +use crate::components::tables::serverless_pricing::{ServerlessPricing, ServerlessPricingTurbo}; use crate::utils::cookies::Notifications; use crate::utils::urls; use std::collections::hash_map::DefaultHasher; @@ -167,6 +169,18 @@ pub enum NotificationLevel { Feature3, } +#[get("/serverless_models/turboframe?