PostgresMLDocumentation
PostgresML is a open-source database extension that turns Postgres into an end-to-end machine learning platform. Build, train, and deploy ML/AI models directly within your Postgres database without moving data between systems.
From f2e79fbd5230425bef12fb45612d4b512711f055 Mon Sep 17 00:00:00 2001
From: Dan <39170265+chillenberger@users.noreply.github.com>
Date: Sat, 10 Feb 2024 11:35:21 -0700
Subject: [PATCH 06/26] add descriptions to docs cms meta
---
pgml-cms/docs/introduction/apis/client-sdks/collections.md | 4 ++++
pgml-cms/docs/introduction/apis/client-sdks/pipelines.md | 4 ++++
.../client-sdks/tutorials/extractive-question-answering.md | 4 ++++
.../tutorials/semantic-search-using-instructor-model.md | 4 ++++
.../client-sdks/tutorials/summarizing-question-answering.md | 4 ++++
.../docs/introduction/apis/sql-extensions/pgml.deploy.md | 5 +++++
pgml-cms/docs/introduction/apis/sql-extensions/pgml.embed.md | 5 +++++
.../introduction/apis/sql-extensions/pgml.predict/README.md | 5 +++++
.../introduction/apis/sql-extensions/pgml.train/README.md | 4 +---
.../apis/sql-extensions/pgml.transform/README.md | 2 ++
pgml-cms/docs/introduction/apis/sql-extensions/pgml.tune.md | 5 +++++
...gml-quantized-llm-support-for-huggingface-transformers.md | 4 ++++
.../making-postgres-30-percent-faster-in-production.md | 4 ++++
.../docs/resources/benchmarks/million-requests-per-second.md | 4 ++++
pgml-cms/docs/resources/benchmarks/mindsdb-vs-postgresml.md | 4 ++++
...tgresml-is-8-40x-faster-than-python-http-microservices.md | 4 ++++
.../src/components/pages/docs/landing_page/landing_page.scss | 4 +---
.../src/components/pages/docs/landing_page/template.html | 4 ++--
18 files changed, 66 insertions(+), 8 deletions(-)
diff --git a/pgml-cms/docs/introduction/apis/client-sdks/collections.md b/pgml-cms/docs/introduction/apis/client-sdks/collections.md
index 2ebc415d5..c5e4df68d 100644
--- a/pgml-cms/docs/introduction/apis/client-sdks/collections.md
+++ b/pgml-cms/docs/introduction/apis/client-sdks/collections.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ Organizational building blocks of the SDK. Manage all documents and related chunks, embeddings, tsvectors, and pipelines.
+---
# Collections
Collections are the organizational building blocks of the SDK. They manage all documents and related chunks, embeddings, tsvectors, and pipelines.
diff --git a/pgml-cms/docs/introduction/apis/client-sdks/pipelines.md b/pgml-cms/docs/introduction/apis/client-sdks/pipelines.md
index 26305c3c3..1bae53481 100644
--- a/pgml-cms/docs/introduction/apis/client-sdks/pipelines.md
+++ b/pgml-cms/docs/introduction/apis/client-sdks/pipelines.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ Pipelines are composed of a model, splitter, and additional optional arguments.
+---
# Pipelines
Pipelines are composed of a Model, Splitter, and additional optional arguments. Collections can have any number of Pipelines. Each Pipeline is ran everytime documents are upserted.
diff --git a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/extractive-question-answering.md b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/extractive-question-answering.md
index f934f61d1..78abc3a09 100644
--- a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/extractive-question-answering.md
+++ b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/extractive-question-answering.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ JavaScript and Python code snippets for end-to-end question answering.
+---
# Extractive Question Answering
Here is the documentation for the JavaScript and Python code snippets performing end-to-end question answering:
diff --git a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/semantic-search-using-instructor-model.md b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/semantic-search-using-instructor-model.md
index 20d0aa756..697845b55 100644
--- a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/semantic-search-using-instructor-model.md
+++ b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/semantic-search-using-instructor-model.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ JavaScript and Python code snippets for using instructor models in more advanced search use cases.
+---
# Semantic Search using Instructor model
This shows using instructor models in the `pgml` SDK for more advanced use cases.
diff --git a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/summarizing-question-answering.md b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/summarizing-question-answering.md
index 02c9bfaa2..caa7c8a59 100644
--- a/pgml-cms/docs/introduction/apis/client-sdks/tutorials/summarizing-question-answering.md
+++ b/pgml-cms/docs/introduction/apis/client-sdks/tutorials/summarizing-question-answering.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ JavaScript and Python code snippets for text summarization.
+---
# Summarizing Question Answering
Here are the Python and JavaScript examples for text summarization using `pgml` SDK
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.deploy.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.deploy.md
index e5c52f793..22dd3733c 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.deploy.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.deploy.md
@@ -1,3 +1,8 @@
+---
+description: >-
+ Release trained models when ML quality metrics computed during training improve. Track model deployments over time and rollback if needed.
+---
+
# pgml.deploy()
## Deployments
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.embed.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.embed.md
index 6b392bc26..61f6a6b0e 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.embed.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.embed.md
@@ -1,3 +1,8 @@
+---
+description: >-
+ Generate high quality embeddings with faster end-to-end vector operations without an additional vector database.
+---
+
# pgml.embed()
Embeddings are a numeric representation of text. They are used to represent words and sentences as vectors, an array of numbers. Embeddings can be used to find similar pieces of text, by comparing the similarity of the numeric vectors using a distance measure, or they can be used as input features for other machine learning models, since most algorithms can't use text directly.
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.predict/README.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.predict/README.md
index 144839180..6566497e5 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.predict/README.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.predict/README.md
@@ -1,3 +1,8 @@
+---
+description: >-
+ Batch predict from data in a table. Online predict with parameters passed in a query. Automatically reuse pre-processing steps from training.
+---
+
# pgml.predict()
## API
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.train/README.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.train/README.md
index 6ac7491a9..d00460bfa 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.train/README.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.train/README.md
@@ -1,8 +1,6 @@
---
description: >-
- The training function is at the heart of PostgresML. It's a powerful single
- mechanism that can handle many different training tasks which are configurable
- with the function parameters.
+ Pre-process and pull data to train a model using any of 50 different ML algorithms.
---
# pgml.train()
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.transform/README.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.transform/README.md
index 4d1c30d12..00093f135 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.transform/README.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.transform/README.md
@@ -1,4 +1,6 @@
---
+description: >-
+ Perform dozens of state-of-the-art natural language processing (NLP) tasks with thousands of models. Serve with the same Postgres infrastructure.
layout:
title:
visible: true
diff --git a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.tune.md b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.tune.md
index 65e0e1c21..524b3adfd 100644
--- a/pgml-cms/docs/introduction/apis/sql-extensions/pgml.tune.md
+++ b/pgml-cms/docs/introduction/apis/sql-extensions/pgml.tune.md
@@ -1,3 +1,8 @@
+---
+description: >-
+ Fine tune open-source models on your own data.
+---
+
# pgml.tune()
## Fine Tuning
diff --git a/pgml-cms/docs/resources/benchmarks/ggml-quantized-llm-support-for-huggingface-transformers.md b/pgml-cms/docs/resources/benchmarks/ggml-quantized-llm-support-for-huggingface-transformers.md
index da53f4702..b6e5c059a 100644
--- a/pgml-cms/docs/resources/benchmarks/ggml-quantized-llm-support-for-huggingface-transformers.md
+++ b/pgml-cms/docs/resources/benchmarks/ggml-quantized-llm-support-for-huggingface-transformers.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ Quantization allows PostgresML to fit larger models in less RAM.
+---
# GGML Quantized LLM support for Huggingface Transformers
diff --git a/pgml-cms/docs/resources/benchmarks/making-postgres-30-percent-faster-in-production.md b/pgml-cms/docs/resources/benchmarks/making-postgres-30-percent-faster-in-production.md
index f999591e1..a0581b8e2 100644
--- a/pgml-cms/docs/resources/benchmarks/making-postgres-30-percent-faster-in-production.md
+++ b/pgml-cms/docs/resources/benchmarks/making-postgres-30-percent-faster-in-production.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ Anyone who runs Postgres at scale knows that performance comes with trade offs.
+---
# Making Postgres 30 Percent Faster in Production
Anyone who runs Postgres at scale knows that performance comes with trade offs. The typical playbook is to place a pooler like PgBouncer in front of your database and turn on transaction mode. This makes multiple clients reuse the same server connection, which allows thousands of clients to connect to your database without causing a fork bomb.
diff --git a/pgml-cms/docs/resources/benchmarks/million-requests-per-second.md b/pgml-cms/docs/resources/benchmarks/million-requests-per-second.md
index 546172c6a..1b7f43985 100644
--- a/pgml-cms/docs/resources/benchmarks/million-requests-per-second.md
+++ b/pgml-cms/docs/resources/benchmarks/million-requests-per-second.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ The question "Does it Scale?" has become somewhat of a meme in software engineering.
+---
# Million Requests per Second
The question "Does it Scale?" has become somewhat of a meme in software engineering. There is a good reason for it though, because most businesses plan for success. If your app, online store, or SaaS becomes popular, you want to be sure that the system powering it can serve all your new customers.
diff --git a/pgml-cms/docs/resources/benchmarks/mindsdb-vs-postgresml.md b/pgml-cms/docs/resources/benchmarks/mindsdb-vs-postgresml.md
index 211d32922..e56d676a8 100644
--- a/pgml-cms/docs/resources/benchmarks/mindsdb-vs-postgresml.md
+++ b/pgml-cms/docs/resources/benchmarks/mindsdb-vs-postgresml.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ Compare two projects that both aim
to provide an SQL interface to ML algorithms and the data they require.
+---
# MindsDB vs PostgresML
## Introduction
diff --git a/pgml-cms/docs/resources/benchmarks/postgresml-is-8-40x-faster-than-python-http-microservices.md b/pgml-cms/docs/resources/benchmarks/postgresml-is-8-40x-faster-than-python-http-microservices.md
index fca4dc98d..73bde7c33 100644
--- a/pgml-cms/docs/resources/benchmarks/postgresml-is-8-40x-faster-than-python-http-microservices.md
+++ b/pgml-cms/docs/resources/benchmarks/postgresml-is-8-40x-faster-than-python-http-microservices.md
@@ -1,3 +1,7 @@
+---
+description: >-
+ PostgresML is a simpler alternative to that ever-growing complexity.
+---
# PostgresML is 8-40x faster than Python HTTP microservices
Machine learning architectures can be some of the most complex, expensive and _difficult_ arenas in modern systems. The number of technologies and the amount of required hardware compete for tightening headcount, hosting, and latency budgets. Unfortunately, the trend in the industry is only getting worse along these lines, with increased usage of state-of-the-art architectures that center around data warehouses, microservices and NoSQL databases.
diff --git a/pgml-dashboard/src/components/pages/docs/landing_page/landing_page.scss b/pgml-dashboard/src/components/pages/docs/landing_page/landing_page.scss
index b64b4b9b3..8e0d5a815 100644
--- a/pgml-dashboard/src/components/pages/docs/landing_page/landing_page.scss
+++ b/pgml-dashboard/src/components/pages/docs/landing_page/landing_page.scss
@@ -10,10 +10,8 @@ div[data-controller="pages-docs-landing-page"] {
.card {
border-radius: 20px;
- background: #{$gray-800};
- box-shadow: 16px 20px 24px 0px rgba(6, 20, 37, 0.04);
- backdrop-filter: blur(8px);
padding: 24px;
+ border: 0px;
.alt_title {
color: #{$gray-100};
diff --git a/pgml-dashboard/src/components/pages/docs/landing_page/template.html b/pgml-dashboard/src/components/pages/docs/landing_page/template.html
index a9482fd38..3076c7cec 100644
--- a/pgml-dashboard/src/components/pages/docs/landing_page/template.html
+++ b/pgml-dashboard/src/components/pages/docs/landing_page/template.html
@@ -37,7 +37,7 @@
PostgresMLDocumen
PostgresML is a open-source database extension that turns Postgres into an end-to-end machine learning platform. Build, train, and deploy ML/AI models directly within your Postgres database without moving data between systems.
-
+
<%+ AltDocCard::new().icon("new_releases").title("Create your dtabase").href("/docs/introduction/getting-started/create-your-database") %>
@@ -49,7 +49,7 @@
PostgresMLDocumen
-
+
<%- section_title(
"
SQL Extensions
",
"SQL extensions provide end-to-end ML & AI functionality from inference to deployment. They can be used in any combination to implement bespoke models across use cases.") %>
From f24023052a236606ce3631f2a33a0fa809abafbf Mon Sep 17 00:00:00 2001
From: Dan <39170265+chillenberger@users.noreply.github.com>
Date: Tue, 13 Feb 2024 10:08:23 -0700
Subject: [PATCH 07/26] split doc render out of collection render, add left
nave to doc article view
---
pgml-dashboard/src/api/cms.rs | 81 +++++++++++--------
.../src/components/layouts/docs/mod.rs | 14 +++-
.../src/components/layouts/docs/template.html | 4 +-
.../navigation/left_nav/docs/docs.scss | 2 +-
.../left_nav/docs/docs_controller.js | 4 -
.../pages/docs/article/article.scss | 9 ++-
.../src/components/pages/docs/article/mod.rs | 12 +++
.../pages/docs/article/template.html | 72 ++++++-----------
.../components/pages/docs/landing_page/mod.rs | 2 +-
.../pages/docs/landing_page/template.html | 3 +-
pgml-dashboard/src/templates/mod.rs | 16 ++--
pgml-dashboard/templates/layout/base.html | 2 +-
12 files changed, 122 insertions(+), 99 deletions(-)
diff --git a/pgml-dashboard/src/api/cms.rs b/pgml-dashboard/src/api/cms.rs
index f7a091f16..60f0c41f9 100644
--- a/pgml-dashboard/src/api/cms.rs
+++ b/pgml-dashboard/src/api/cms.rs
@@ -14,11 +14,10 @@ use yaml_rust::YamlLoader;
use crate::{
components::{cms::index_link::IndexLink, layouts::marketing::base::Theme, layouts::marketing::Base},
guards::Cluster,
- responses::{ResponseOk, Template},
+ responses::{ResponseOk, Template, Response},
templates::docs::*,
utils::config,
};
-use sailfish::TemplateOnce;
use serde::{Deserialize, Serialize};
use std::fmt;
@@ -313,12 +312,11 @@ impl Collection {
NamedFile::open(self.asset_dir.join(path)).await.ok()
}
- pub async fn get_content(
+ pub async fn get_content_path(
&self,
mut path: PathBuf,
- cluster: &Cluster,
origen: &Origin<'_>,
- ) -> Result {
+ ) -> (PathBuf, String) {
info!("get_content: {} | {path:?}", self.name);
let mut redirected = false;
@@ -333,7 +331,6 @@ impl Collection {
}
None => {}
};
-
let canonical = format!(
"https://postgresml.org{}/{}",
self.url_root.to_string_lossy(),
@@ -344,7 +341,7 @@ impl Collection {
}
let path = self.root_dir.join(format!("{}.md", path.to_string_lossy()));
- self.render(&path, &canonical, cluster).await
+ (path, canonical)
}
//github.com/ Create an index of the Collection based on the SUMMARY.md from Gitbook.
@@ -366,13 +363,13 @@ impl Collection {
{
match node {
Node::List(list) => {
- let mut links: Vec = self
+ let links: Vec = self
.get_sub_links(list)
.unwrap_or_else(|_| panic!("Could not parse list of index links: {summary_path:?}"));
let mut out = match parent_folder.as_ref() {
Some(parent_folder) => {
- let mut parent = IndexLink::new(parent_folder.as_ref());
+ let mut parent = IndexLink::new(parent_folder.as_ref()).href("");
parent.children = links.clone();
Vec::from([parent])
}
@@ -487,7 +484,9 @@ impl Collection {
while children.len() > 0 {
let current = children.pop().unwrap();
- urls.push(current.href.clone());
+ if current.href.len() > 0 {
+ urls.push(current.href.clone());
+ }
for i in (0..current.children.len()).rev() {
children.push(¤t.children[i])
@@ -517,11 +516,6 @@ impl Collection {
canonical: &str,
cluster: &Cluster,
) -> Result {
- let user = if cluster.context.user.is_anonymous() {
- None
- } else {
- Some(cluster.context.user.clone())
- };
match Document::from_path(&path).await {
Ok(doc) => {
@@ -534,16 +528,12 @@ impl Collection {
if let Some(description) = &doc.description {
layout.description(description);
}
- if let Some(user) = &user {
- layout.user(user);
- }
let layout = layout
.canonical(canonical)
.nav_title(&self.name)
.nav_links(&index)
- .toc_links(&doc.toc_links)
- .footer(cluster.context.marketing_footer.to_string());
+ .toc_links(&doc.toc_links);
Ok(ResponseOk(
layout.render(crate::templates::Article { content: doc.html() }),
@@ -561,14 +551,9 @@ impl Collection {
"#,
);
- if let Some(user) = &user {
- layout.user(user);
- }
-
layout
.nav_links(&self.index)
- .nav_title(&self.name)
- .footer(cluster.context.marketing_footer.to_string());
+ .nav_title(&self.name);
layout.render(crate::templates::Article { content: doc });
@@ -612,7 +597,8 @@ async fn get_blog(
cluster: &Cluster,
origen: &Origin<'_>,
) -> Result
{
- BLOG.get_content(path, cluster, origen).await
+ let (doc_file_path, canonical) = BLOG.get_content_path(path.clone(), origen).await;
+ BLOG.render(&doc_file_path, &canonical, cluster).await
}
#[get("/careers/", rank = 5)]
@@ -621,7 +607,8 @@ async fn get_careers(
cluster: &Cluster,
origen: &Origin<'_>,
) -> Result {
- CAREERS.get_content(path, cluster, origen).await
+ let (doc_file_path, canonical) = CAREERS.get_content_path(path.clone(), origen).await;
+ CAREERS.render(&doc_file_path, &canonical, cluster).await
}
#[get("/docs/", rank = 5)]
@@ -630,7 +617,37 @@ async fn get_docs(
cluster: &Cluster,
origen: &Origin<'_>,
) -> Result {
- DOCS.get_content(path, cluster, origen).await
+ let (doc_file_path, canonical) = DOCS.get_content_path(path.clone(), origen).await;
+
+
+ match Document::from_path(&doc_file_path).await {
+ Ok(doc) => {
+ let index = DOCS.open_index(&doc.path);
+
+ let layout = crate::components::layouts::Docs::new(&doc.title, Some(cluster))
+ .index(&index)
+ .image(&doc.thumbnail)
+ .canonical(&canonical);
+
+
+ let page = crate::components::pages::docs::Article::new()
+ .toc_links(&doc.toc_links)
+ .content(&doc.html());
+
+ Ok(ResponseOk(
+ layout.render(page),
+ ))
+ }
+ // Return page not found on bad path
+ _ => {
+ let layout = crate::components::layouts::Docs::new("404", Some(cluster))
+ .index(&DOCS.index);
+
+ let page = crate::components::pages::docs::Article::new().document_not_found();
+
+ Err(crate::responses::NotFound(layout.render(page)))
+ }
+ }
}
#[get("/blog")]
@@ -666,10 +683,8 @@ async fn docs_landing_page(cluster: &Cluster) -> Result", rank = 5)]
async fn get_user_guides(
path: PathBuf,
- cluster: &Cluster,
- origen: &Origin<'_>,
-) -> Result {
- DOCS.get_content(path, cluster, origen).await
+) -> Result {
+ Ok(Response::redirect(format!("/docs/{}", path.display().to_string())))
}
pub fn routes() -> Vec {
diff --git a/pgml-dashboard/src/components/layouts/docs/mod.rs b/pgml-dashboard/src/components/layouts/docs/mod.rs
index f968b17ed..a682072ca 100644
--- a/pgml-dashboard/src/components/layouts/docs/mod.rs
+++ b/pgml-dashboard/src/components/layouts/docs/mod.rs
@@ -34,11 +34,23 @@ impl Docs {
}
}
- pub fn index(mut self, index: &Vec) -> Self {
+ pub fn index(mut self, index: &Vec) -> Docs {
self.index = index.clone();
self
}
+ pub fn image(mut self, image: &Option) -> Docs {
+ if let Some(image) = image {
+ self.head = self.head.image(image.as_str());
+ }
+ self
+ }
+
+ pub fn canonical(mut self, canonical: &str) -> Docs {
+ self.head = self.head.canonical(canonical);
+ self
+ }
+
pub fn render(mut self, template: T) -> String
where
T: sailfish::TemplateOnce,
diff --git a/pgml-dashboard/src/components/layouts/docs/template.html b/pgml-dashboard/src/components/layouts/docs/template.html
index 1176cf40d..cc391c6ac 100644
--- a/pgml-dashboard/src/components/layouts/docs/template.html
+++ b/pgml-dashboard/src/components/layouts/docs/template.html
@@ -16,11 +16,11 @@