From 06d6844e5decdc0c1df40a3cadb4347d14c44fe7 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 19 Mar 2024 14:10:57 -0700 Subject: [PATCH 1/3] singularize titles --- .../src/components/pages/docs/landing_page/mod.rs | 5 ++--- .../src/components/pages/docs/landing_page/template.html | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pgml-dashboard/src/components/pages/docs/landing_page/mod.rs b/pgml-dashboard/src/components/pages/docs/landing_page/mod.rs index 063c5051b..854e8109d 100644 --- a/pgml-dashboard/src/components/pages/docs/landing_page/mod.rs +++ b/pgml-dashboard/src/components/pages/docs/landing_page/mod.rs @@ -84,14 +84,13 @@ impl LandingPage { let mut benchmarks_folder: Vec = Vec::new(); let mut extension_folder: Vec = Vec::new(); let mut client_sdks_folder: Vec = Vec::new(); - while !children.is_empty() { let link = children.pop().unwrap(); match link.title.to_lowercase().as_ref() { "benchmarks" => benchmarks_folder = link.children, - "sql extensions" => extension_folder = link.children, - "client sdks" => client_sdks_folder = link.children, + "sql extension" => extension_folder = link.children, + "client sdk" => client_sdks_folder = link.children, _ => { if !link.children.is_empty() { for item in link.children.clone() { diff --git a/pgml-dashboard/src/components/pages/docs/landing_page/template.html b/pgml-dashboard/src/components/pages/docs/landing_page/template.html index c2cfc935c..db5eb423f 100644 --- a/pgml-dashboard/src/components/pages/docs/landing_page/template.html +++ b/pgml-dashboard/src/components/pages/docs/landing_page/template.html @@ -55,8 +55,8 @@

PostgresML
Documen
<%- section_title( - "
SQL Extensions
", - "SQL extensions provide end-to-end ML & AI functionality from inference to deployment. They can be used in any combination to implement bespoke models across use cases.") %> + "
SQL Extension
", + "The SQL extension provides end-to-end ML & AI functionality from inference to deployment. It can be used in any combination to implement bespoke models across use cases.") %>

@@ -76,13 +76,13 @@
PostgresML
Documen
<%- section_title( r#"
-
Client SDKs
+
Client SDK

"#, - "Client SDKs implement the best practices to streamline development of common ML/AI use cases in JavaScript or Python.")%> + "Our Client SDK implements the best practices to streamline development of common ML/AI use cases in JavaScript or Python.")%>

From 8e38d8d2e6ba1a9b139ac9371f91e7417bf5ef26 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 19 Mar 2024 14:11:12 -0700 Subject: [PATCH 2/3] remove trailing spaces --- ...from-closed-to-open-source-ai-in-minutes.md | 10 +++++----- ...stgresml-as-a-memory-backend-to-auto-gpt.md | 2 +- pgml-cms/careers/product-manager.md | 2 +- .../docs/api/client-sdk/document-search.md | 2 +- pgml-cms/docs/api/client-sdk/pipelines.md | 8 ++++---- .../client-sdk/tutorials/semantic-search.md | 2 +- .../sql-extension/pgml.transform/fill-mask.md | 2 +- .../pgml.transform/text-to-text-generation.md | 2 +- .../getting-started/connect-your-app.md | 2 +- .../developer-docs/self-hosting/README.md | 2 +- .../developer-docs/self-hosting/replication.md | 4 ++-- pgml-cms/docs/resources/faqs.md | 18 +++++++++--------- pgml-cms/docs/use-cases/chatbots/README.md | 8 ++++---- 13 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md index c91fa151c..6cce2a3f2 100644 --- a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md +++ b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md @@ -21,7 +21,7 @@ December 1, 2023 ### Introduction -Last week's whirlwind of events with OpenAI CEO and founder Sam Altman stirred up quite a buzz in the industry. The whole deal left many of us scratching our heads about where OpenAI is headed. Between the corporate drama, valid worries about privacy and transparency, and ongoing issues around model performance, censorship, and the use of marketing scare tactics; it's no wonder there's a growing sense of dissatisfaction and distrust in proprietary models. +Last week's whirlwind of events with OpenAI CEO and founder Sam Altman stirred up quite a buzz in the industry. The whole deal left many of us scratching our heads about where OpenAI is headed. Between the corporate drama, valid worries about privacy and transparency, and ongoing issues around model performance, censorship, and the use of marketing scare tactics; it's no wonder there's a growing sense of dissatisfaction and distrust in proprietary models. On the bright side, the open-source realm has emerged as a potent contender, not just in reaction to OpenAI's shortcomings but as a genuine advancement in its own right. We're all about making the benefits of open-source models accessible to as many folks as possible. So, we've made switching from OpenAI to open-source as easy as possible with a drop-in replacement. It lets users specify any model they’d like in just a few lines of code. We call it the OpenAI Switch Kit. Read on to learn more about why we think you’ll like it, or just try it now and see what you think. @@ -29,10 +29,10 @@ On the bright side, the open-source realm has emerged as a potent contender, not We think so. Open-source models have made remarkable strides, not only catching up to proprietary counterparts but also surpassing them across multiple domains. The advantages are clear: -* Performance & reliability: Open-source models are increasingly comparable or superior across a wide range of tasks and performance metrics. Mistral and Llama-based models, for example, are easily faster than GPT 4. Reliability is another concern you may reconsider leaving in the hands of OpenAI. OpenAI’s API has suffered from several recent outages, and their rate limits can interrupt your app if there is a surge in usage. Open-source models enable greater control over your model’s latency, scalability and availability. Ultimately, the outcome of greater control is that your organization can produce a more dependable integration and a highly reliable production application. -* Safety & privacy: Open-source models are the clear winner when it comes to security sensitive AI applications. There are [enormous risks](https://www.infosecurity-magazine.com/news-features/chatgpts-datascraping-scrutiny/) associated with transmitting private data to external entities such as OpenAI. By contrast, open-source models retain sensitive information within an organization's own cloud environments. The data never has to leave your premises, so the risk is bypassed altogether – it’s enterprise security by default. At PostgresML, we offer such private hosting of LLM’s in your own cloud. -* Model censorship: A growing number of experts inside and outside of leading AI companies argue that model restrictions have gone too far. The Atlantic recently published an [article on AI’s “Spicy-Mayo Problem'' ](https://www.theatlantic.com/ideas/archive/2023/11/ai-safety-regulations-uncensored-models/676076/) which delves into the issues surrounding AI censorship. The titular example describes a chatbot refusing to return commands asking for a “dangerously spicy” mayo recipe. Censorship can affect baseline performance, and in the case of apps for creative work such as Sudowrite, unrestricted open-source models can actually be a key differentiating value for users. -* Flexibility & customization: Closed-source models like GPT3.5 Turbo are fine for generalized tasks, but leave little room for customization. Fine-tuning is highly restricted. Additionally, the headwinds at OpenAI have exposed the [dangerous reality of AI vendor lock-in](https://techcrunch.com/2023/11/21/openai-dangers-vendor-lock-in/). Open-source models such as MPT-7B, Llama V2 and Mistral 7B are designed with extensive flexibility for fine tuning, so organizations can create custom specifications and optimize model performance for their unique needs. This level of customization and flexibility opens the door for advanced techniques like DPO, PPO LoRa and more. +* Performance & reliability: Open-source models are increasingly comparable or superior across a wide range of tasks and performance metrics. Mistral and Llama-based models, for example, are easily faster than GPT 4. Reliability is another concern you may reconsider leaving in the hands of OpenAI. OpenAI’s API has suffered from several recent outages, and their rate limits can interrupt your app if there is a surge in usage. Open-source models enable greater control over your model’s latency, scalability and availability. Ultimately, the outcome of greater control is that your organization can produce a more dependable integration and a highly reliable production application. +* Safety & privacy: Open-source models are the clear winner when it comes to security sensitive AI applications. There are [enormous risks](https://www.infosecurity-magazine.com/news-features/chatgpts-datascraping-scrutiny/) associated with transmitting private data to external entities such as OpenAI. By contrast, open-source models retain sensitive information within an organization's own cloud environments. The data never has to leave your premises, so the risk is bypassed altogether – it’s enterprise security by default. At PostgresML, we offer such private hosting of LLM’s in your own cloud. +* Model censorship: A growing number of experts inside and outside of leading AI companies argue that model restrictions have gone too far. The Atlantic recently published an [article on AI’s “Spicy-Mayo Problem'' ](https://www.theatlantic.com/ideas/archive/2023/11/ai-safety-regulations-uncensored-models/676076/) which delves into the issues surrounding AI censorship. The titular example describes a chatbot refusing to return commands asking for a “dangerously spicy” mayo recipe. Censorship can affect baseline performance, and in the case of apps for creative work such as Sudowrite, unrestricted open-source models can actually be a key differentiating value for users. +* Flexibility & customization: Closed-source models like GPT3.5 Turbo are fine for generalized tasks, but leave little room for customization. Fine-tuning is highly restricted. Additionally, the headwinds at OpenAI have exposed the [dangerous reality of AI vendor lock-in](https://techcrunch.com/2023/11/21/openai-dangers-vendor-lock-in/). Open-source models such as MPT-7B, Llama V2 and Mistral 7B are designed with extensive flexibility for fine tuning, so organizations can create custom specifications and optimize model performance for their unique needs. This level of customization and flexibility opens the door for advanced techniques like DPO, PPO LoRa and more. ### Try it now diff --git a/pgml-cms/blog/postgresml-as-a-memory-backend-to-auto-gpt.md b/pgml-cms/blog/postgresml-as-a-memory-backend-to-auto-gpt.md index bea3cb639..d34f19a13 100644 --- a/pgml-cms/blog/postgresml-as-a-memory-backend-to-auto-gpt.md +++ b/pgml-cms/blog/postgresml-as-a-memory-backend-to-auto-gpt.md @@ -88,7 +88,7 @@ Adding PostgresML as a memory backend to Auto-GPT is a relatively simple process POSTGRESML_TABLENAME =autogpt_text_embeddings ``` - If you are using PostgresML cloud, use the hostname and credentials from the cloud platform. + If you are using PostgresML cloud, use the hostname and credentials from the cloud platform. !!! note diff --git a/pgml-cms/careers/product-manager.md b/pgml-cms/careers/product-manager.md index 182cef437..f855d1ac6 100644 --- a/pgml-cms/careers/product-manager.md +++ b/pgml-cms/careers/product-manager.md @@ -4,7 +4,7 @@ tags: [engineering] --- # Product Manager -PostgresML provides cloud hosted AI application databases, that bring the latest machine learning and vector capabilities to the heart of everyone’s favorite tech stack. We're looking for a Head of Growth, with a Technical Product Manager skill set to help shape the core product, inside and outside the company. +PostgresML provides cloud hosted AI application databases, that bring the latest machine learning and vector capabilities to the heart of everyone’s favorite tech stack. We're looking for a Head of Growth, with a Technical Product Manager skill set to help shape the core product, inside and outside the company. Reach out if you want to: diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md index 5c7bd5fe4..dd2ce0e49 100644 --- a/pgml-cms/docs/api/client-sdk/document-search.md +++ b/pgml-cms/docs/api/client-sdk/document-search.md @@ -118,7 +118,7 @@ results = await collection.search( Just like `vector_search`, `search` takes in two arguments. The first is a `JSON` object specifying the `query` and `limit` and the second is the `Pipeline`. The `query` object can have three fields: `full_text_search`, `semantic_search` and `filter`. Both `full_text_search` and `semantic_search` function similarly. They take in the text to compare against, titled`query`, an optional `boost` parameter used to boost the effectiveness of the ranking, and `semantic_search` also takes in an optional `parameters` key which specify parameters to pass to the embedding model when embedding the passed in text. -Lets break this query down a little bit more. We are asking for a maximum of 10 documents ranked by `full_text_search` on the `abstract` and `semantic_search` on the `abstract` and `body`. We are also filtering out all documents that do not have the key `user_id` equal to `1`. The `full_text_search` provides a score for the `abstract`, and `semantic_search` provides scores for the `abstract` and the `body`. The `boost` parameter is a multiplier applied to these scores before they are summed together and sorted by `score` `DESC`. +Lets break this query down a little bit more. We are asking for a maximum of 10 documents ranked by `full_text_search` on the `abstract` and `semantic_search` on the `abstract` and `body`. We are also filtering out all documents that do not have the key `user_id` equal to `1`. The `full_text_search` provides a score for the `abstract`, and `semantic_search` provides scores for the `abstract` and the `body`. The `boost` parameter is a multiplier applied to these scores before they are summed together and sorted by `score` `DESC`. The `filter` is structured the same way it is when performing `vector_search` see [filtering with vector\_search](https://postgresml.org/docs/api/client-sdk/search)[ ](https://postgresml.org/docs/api/client-sdk/search#metadata-filtering)for more examples on filtering documents. diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index d4ee50294..2d18d05cd 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -1,10 +1,10 @@ # Pipelines -`Pipeline`s define the schema for the transformation of documents. Different `Pipeline`s can be used for different tasks. +`Pipeline`s define the schema for the transformation of documents. Different `Pipeline`s can be used for different tasks. ## Defining Schema -New `Pipeline`s require schema. Here are a few examples of variations of schema along with common use cases. +New `Pipeline`s require schema. Here are a few examples of variations of schema along with common use cases. For the following section we will assume we have documents that have the structure: @@ -100,7 +100,7 @@ pipeline = Pipeline( {% endtab %} {% endtabs %} -This `Pipeline` splits and embeds the `body` text enabling semantic search using vectors. This is a very popular `Pipeline` for RAG. +This `Pipeline` splits and embeds the `body` text enabling semantic search using vectors. This is a very popular `Pipeline` for RAG. We support most every open source model on [Hugging Face](https://huggingface.co/), and OpenAI's embedding models. To use a model from OpenAI specify the `source` as `openai`, and make sure and set the environment variable `OPENAI_API_KEY`. @@ -210,7 +210,7 @@ pipeline = Pipeline("test_pipeline") ## Searching with Pipelines -There are two different forms of search that can be done after adding a `Pipeline` to a `Collection` +There are two different forms of search that can be done after adding a `Pipeline` to a `Collection` * [Vector Search](https://postgresml.org/docs/api/client-sdk/search) * [Document Search](https://postgresml.org/docs/api/client-sdk/document-search) diff --git a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md index 8197c92cb..a82550a92 100644 --- a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md +++ b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md @@ -4,7 +4,7 @@ description: Example for Semantic Search # Semantic Search -This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. +This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. [Link to full JavaScript implementation](../../../../../pgml-sdks/pgml/javascript/examples/semantic\_search.js) diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md b/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md index 42ef2d3e8..7d7ed9948 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md +++ b/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md @@ -4,7 +4,7 @@ description: Task to fill words in a sentence that are hidden # Fill Mask -Fill-mask refers to a task where certain words in a sentence are hidden or "masked", and the objective is to predict what words should fill in those masked positions. Such models are valuable when we want to gain statistical insights about the language used to train the model. +Fill-mask refers to a task where certain words in a sentence are hidden or "masked", and the objective is to predict what words should fill in those masked positions. Such models are valuable when we want to gain statistical insights about the language used to train the model. ```sql SELECT pgml.transform( diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/text-to-text-generation.md b/pgml-cms/docs/api/sql-extension/pgml.transform/text-to-text-generation.md index 6761ba66e..dc97021c7 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.transform/text-to-text-generation.md +++ b/pgml-cms/docs/api/sql-extension/pgml.transform/text-to-text-generation.md @@ -1,6 +1,6 @@ # Text-to-Text Generation -Text-to-text generation methods, such as T5, are neural network architectures designed to perform various natural language processing tasks, including summarization, translation, and question answering. T5 is a transformer-based architecture pre-trained on a large corpus of text data using denoising autoencoding. This pre-training process enables the model to learn general language patterns and relationships between different tasks, which can be fine-tuned for specific downstream tasks. During fine-tuning, the T5 model is trained on a task-specific dataset to learn how to perform the specific task. +Text-to-text generation methods, such as T5, are neural network architectures designed to perform various natural language processing tasks, including summarization, translation, and question answering. T5 is a transformer-based architecture pre-trained on a large corpus of text data using denoising autoencoding. This pre-training process enables the model to learn general language patterns and relationships between different tasks, which can be fine-tuned for specific downstream tasks. During fine-tuning, the T5 model is trained on a task-specific dataset to learn how to perform the specific task. _Translation_ diff --git a/pgml-cms/docs/introduction/getting-started/connect-your-app.md b/pgml-cms/docs/introduction/getting-started/connect-your-app.md index 8dc96edd4..1613f5501 100644 --- a/pgml-cms/docs/introduction/getting-started/connect-your-app.md +++ b/pgml-cms/docs/introduction/getting-started/connect-your-app.md @@ -4,7 +4,7 @@ description: PostgresML is compatible with all standard PostgreSQL clients # Connect your app -You can connect to your database from any Postgres compatible client. PostgresML is intended to serve in the traditional role of an application database, along with it's extended role as an MLOps platform to make it easy to build and maintain AI applications. +You can connect to your database from any Postgres compatible client. PostgresML is intended to serve in the traditional role of an application database, along with it's extended role as an MLOps platform to make it easy to build and maintain AI applications. ## Application SDKs diff --git a/pgml-cms/docs/resources/developer-docs/self-hosting/README.md b/pgml-cms/docs/resources/developer-docs/self-hosting/README.md index e64677886..8a4ca9c6e 100644 --- a/pgml-cms/docs/resources/developer-docs/self-hosting/README.md +++ b/pgml-cms/docs/resources/developer-docs/self-hosting/README.md @@ -104,7 +104,7 @@ Replace `14` in `postgresql-server-dev-14` with your Postgres version. #### Install pgvector - You can install `pgvector` directly from GitHub by just running: +You can install `pgvector` directly from GitHub by just running: ``` git clone https://github.com/pgvector/pgvector /tmp/pgvector diff --git a/pgml-cms/docs/resources/developer-docs/self-hosting/replication.md b/pgml-cms/docs/resources/developer-docs/self-hosting/replication.md index 92fa25726..411ed844d 100644 --- a/pgml-cms/docs/resources/developer-docs/self-hosting/replication.md +++ b/pgml-cms/docs/resources/developer-docs/self-hosting/replication.md @@ -88,7 +88,7 @@ By default, S3 buckets are protected against public access, which is important f #### Configure pgBackRest -pgBackRest can be configured by editing the `/etc/pgbackrest.conf` file. This file should be readable by the `postgres` user and nobody else, since it'll contain some important information. +pgBackRest can be configured by editing the `/etc/pgbackrest.conf` file. This file should be readable by the `postgres` user and nobody else, since it'll contain some important information. Using the S3 bucket we created above, we can configure pgBackRest to use it for the WAL archive: @@ -138,7 +138,7 @@ Before configuring the replica, we need to make sure it's running the same softw #### Replicating data -A streaming replica is byte-for-byte identical to the primary, so in order to create one, we first need to copy all the database files stored on the primary over to the replica. Postgres provides a very handy command line tool for this called `pg_basebackup`. +A streaming replica is byte-for-byte identical to the primary, so in order to create one, we first need to copy all the database files stored on the primary over to the replica. Postgres provides a very handy command line tool for this called `pg_basebackup`. On Ubuntu 22.04, the PostgreSQL 14 Debian package automatically creates a new Postgres data directory and cluster configuration. Since the replica has to have the same data as the primary, first thing we need to do is to delete that automatically created data directory and replace it with the one stored on the primary. diff --git a/pgml-cms/docs/resources/faqs.md b/pgml-cms/docs/resources/faqs.md index 524aab00b..2d8ede8c6 100644 --- a/pgml-cms/docs/resources/faqs.md +++ b/pgml-cms/docs/resources/faqs.md @@ -6,19 +6,19 @@ description: PostgresML Frequently Asked Questions ## What is PostgresML? -PostgresML is an open-source database extension that turns Postgres into an end-to-end machine learning platform. It allows you to build, train, and deploy ML models directly within your Postgres database without moving data between systems. +PostgresML is an open-source database extension that turns Postgres into an end-to-end machine learning platform. It allows you to build, train, and deploy ML models directly within your Postgres database without moving data between systems. -## What is a DB extension? +## What is a DB extension? A database extension is software that extends the capabilities of a database. Postgres allows extensions to add new data types, functions, operators, indexes, etc. PostgresML uses extensions to bring machine learning capabilities natively into Postgres. -## How does it work? +## How does it work? PostgresML installs as extensions in Postgres. It provides SQL API functions for each step of the ML workflow like importing data, transforming features, training models, making predictions, etc. Models are stored back into Postgres tables. This unified approach eliminates complexity. ## What are the benefits? -Benefits include faster development cycles, reduced latency, tighter integration between ML and applications, leveraging Postgres' reliability and ACID transactions, and horizontal scaling. +Benefits include faster development cycles, reduced latency, tighter integration between ML and applications, leveraging Postgres' reliability and ACID transactions, and horizontal scaling. ## What are the cons? @@ -31,10 +31,10 @@ Hosted PostgresML is a fully managed cloud service that provides all the capabil With hosted PostgresML, you get: * Flexible compute resources - Choose CPU, RAM or GPU machines tailored to your workload -* Horizontally scalable inference with read-only replicas -* High availability for production applications with multi-region deployments -* Support for multiple users and databases -* Automated backups and point-in-time restore -* Monitoring dashboard with metrics and logs +* Horizontally scalable inference with read-only replicas +* High availability for production applications with multi-region deployments +* Support for multiple users and databases +* Automated backups and point-in-time restore +* Monitoring dashboard with metrics and logs In summary, hosted PostgresML removes the operational burden so you can focus on developing machine learning applications, while still getting the benefits of the unified PostgresML architecture. diff --git a/pgml-cms/docs/use-cases/chatbots/README.md b/pgml-cms/docs/use-cases/chatbots/README.md index e31399a46..869a733e6 100644 --- a/pgml-cms/docs/use-cases/chatbots/README.md +++ b/pgml-cms/docs/use-cases/chatbots/README.md @@ -243,7 +243,7 @@ asyncio.run(main()) You just asked me my name, and I responded that my name is Hermes. Is there anything else you would like to know? ``` -By chaining these special tags we can build a conversation that Hermes has been trained to understand and is a great function approximator for. +By chaining these special tags we can build a conversation that Hermes has been trained to understand and is a great function approximator for. {% hint style="info" %} This example highlights that modern LLM's are stateless function approximators. Notice we have included the first question we asked and the models response in our input. Every time we ask it a new question in our conversation, we will have to supply the entire conversation history if we want it to know what we already discussed. LLMs have no built in way to remember past questions and conversations. @@ -294,13 +294,13 @@ Once again we are using `pgml` to abstract away the complicated pieces for our m Our search returned the exact section of the Wikipedia article we wanted! Let's talk a little bit about what is going on here. -First we create a `pipeline`. A pipeline is composed of a `splitter` that splits a document, and a `model` that embeds the document. In this case we are using the default for both. +First we create a `pipeline`. A pipeline is composed of a `splitter` that splits a document, and a `model` that embeds the document. In this case we are using the default for both. Second we create a `collection`. A `collection` is just some number of documents that we can search over. In relation to our hypothetical example and diagram above, you can think of the `collection` as the Store - the storage of chunk's text and embeddings we can search over. -After creating the `collection` we add the `pipeline` to it. This means every time we upsert new documents, the `pipeline` will automatically split and embed those documents. +After creating the `collection` we add the `pipeline` to it. This means every time we upsert new documents, the `pipeline` will automatically split and embed those documents. -We extract the text from the Wikipedia article using the `wikipediaapi` library and upsert it into our collection. +We extract the text from the Wikipedia article using the `wikipediaapi` library and upsert it into our collection. After our collection has split and embedded the Wikipedia document we search over it getting the best matching chunk and print that chunk's text out. From 99764c91cb5328c8ed31d930b3c80b24f6d3c7b0 Mon Sep 17 00:00:00 2001 From: Montana Low Date: Tue, 19 Mar 2024 14:12:23 -0700 Subject: [PATCH 3/3] fix italics --- pgml-cms/docs/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pgml-cms/docs/README.md b/pgml-cms/docs/README.md index a698c121a..492186ec3 100644 --- a/pgml-cms/docs/README.md +++ b/pgml-cms/docs/README.md @@ -6,9 +6,9 @@ description: The key concepts that make up PostgresML. PostgresML is a complete MLOps platform built on PostgreSQL. -> _Move the models to the database_, _rather than continuously moving the data to the models._ +> _Move the models to the database, rather than continuously moving the data to the models._ -The data for ML & AI systems is inherently larger and more dynamic than the models. It's more efficient, manageable and reliable to move the models to the database, rather than continuously moving the data to the models\_.\_ PostgresML allows you to take advantage of the fundamental relationship between data and models, by extending the database with the following capabilities and goals: +The data for ML & AI systems is inherently larger and more dynamic than the models. It's more efficient, manageable and reliable to move the models to the database, rather than continuously moving the data to the models. PostgresML allows you to take advantage of the fundamental relationship between data and models, by extending the database with the following capabilities and goals: * Model Serving - _GPU accelerated_ inference engine for interactive applications, with no additional networking latency or reliability costs. * Model Store - Download _open-source_ models including state of the art LLMs from HuggingFace, and track changes in performance between versions. pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy