From 5e916d5b12c49e422daa64258cd8be236cdfaf7f Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 12:35:30 -0700 Subject: [PATCH 1/6] rename embedding models --- packages/pgml-rds-proxy/README.md | 2 +- pgml-apps/pgml-chat/pgml_chat/main.py | 5 +- ...s-with-open-source-models-in-postgresml.md | 20 ++-- ...-with-application-data-in-your-database.md | 4 +- ...latency-knowledge-based-chatbots-part-i.md | 6 +- .../speeding-up-vector-recall-5x-with-hnsw.md | 4 +- pgml-cms/blog/the-1.0-sdk-is-here.md | 4 +- ...rating-query-embeddings-in-the-database.md | 12 +-- ...gresml-with-django-and-embedding-search.md | 6 +- pgml-cms/docs/api/client-sdk/README.md | 6 +- .../docs/api/client-sdk/document-search.md | 14 +-- pgml-cms/docs/api/client-sdk/pipelines.md | 24 ++--- pgml-cms/docs/api/client-sdk/search.md | 14 +-- .../client-sdk/tutorials/semantic-search-1.md | 9 +- .../client-sdk/tutorials/semantic-search.md | 4 +- pgml-cms/docs/api/sql-extension/README.md | 4 +- pgml-cms/docs/guides/chatbots/README.md | 2 +- .../embeddings/dimensionality-reduction.md | 94 ++++++++++++------- .../import-your-data/foreign-data-wrappers.md | 2 +- pgml-cms/docs/product/vector-database.md | 6 +- .../partitioning.md | 2 +- .../developer-docs/quick-start-with-docker.md | 4 +- pgml-cms/docs/use-cases/chatbots.md | 4 +- ...-with-application-data-in-your-database.md | 4 +- ...rating-query-embeddings-in-the-database.md | 12 +-- .../pgml_embeddings.py | 2 +- .../pgml_embeddings.sql | 4 +- .../hf_pinecone_vs_postgresml/pgml_query.py | 2 +- .../src/components/pages/demo/template.html | 8 +- pgml-dashboard/src/utils/markdown.rs | 10 +- pgml-extension/examples/transformers.sql | 6 +- pgml-sdks/pgml/javascript/examples/README.md | 4 +- .../examples/extractive_question_answering.js | 2 +- .../javascript/examples/question_answering.js | 2 +- .../examples/question_answering_instructor.js | 5 +- .../javascript/examples/semantic_search.js | 2 +- .../summarizing_question_answering.js | 2 +- pgml-sdks/pgml/python/examples/README.md | 4 +- .../examples/extractive_question_answering.py | 2 +- .../python/examples/question_answering.py | 2 +- .../examples/question_answering_instructor.py | 5 +- .../python/examples/rag_question_answering.py | 2 +- .../pgml/python/examples/semantic_search.py | 2 +- .../summarizing_question_answering.py | 2 +- pgml-sdks/pgml/python/tests/stress_test.py | 5 +- pgml-sdks/pgml/src/model.rs | 2 +- pgml-sdks/pgml/src/sql/remote.sql | 4 +- 47 files changed, 166 insertions(+), 181 deletions(-) diff --git a/packages/pgml-rds-proxy/README.md b/packages/pgml-rds-proxy/README.md index d7ff662de..0301ea584 100644 --- a/packages/pgml-rds-proxy/README.md +++ b/packages/pgml-rds-proxy/README.md @@ -76,7 +76,7 @@ SELECT FROM dblink( 'postgresml', - 'SELECT * FROM pgml.embed(''intfloat/e5-small'', ''embed this text'') AS embedding' + 'SELECT * FROM pgml.embed(''Alibaba-NLP/gte-base-en-v1.5'', ''embed this text'') AS embedding' ) AS t1(embedding real[386]); ``` diff --git a/pgml-apps/pgml-chat/pgml_chat/main.py b/pgml-apps/pgml-chat/pgml_chat/main.py index 3d8b27dda..0d1ae6c28 100644 --- a/pgml-apps/pgml-chat/pgml_chat/main.py +++ b/pgml-apps/pgml-chat/pgml_chat/main.py @@ -195,9 +195,8 @@ def handler(signum, frame): ) splitter = Splitter(splitter_name, splitter_params) -model_name = "hkunlp/instructor-xl" -model_embedding_instruction = "Represent the %s document for retrieval: " % (bot_topic) -model_params = {"instruction": model_embedding_instruction} +model_name = "Alibaba-NLP/gte-base-en-v1.5" +model_params = {} model = Model(model_name, "pgml", model_params) pipeline = Pipeline(args.collection_name + "_pipeline", model, splitter) diff --git a/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md b/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md index b66129614..664569814 100644 --- a/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md +++ b/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md @@ -122,14 +122,14 @@ LIMIT 5; PostgresML provides a simple interface to generate embeddings from text in your database. You can use the [`pgml.embed`](https://postgresml.org/docs/guides/transformers/embeddings) function to generate embeddings for a column of text. The function takes a transformer name and a text value. The transformer will automatically be downloaded and cached on your connection process for reuse. You can see a list of potential good candidate models to generate embeddings on the [Massive Text Embedding Benchmark leaderboard](https://huggingface.co/spaces/mteb/leaderboard). -Since our corpus of documents (movie reviews) are all relatively short and similar in style, we don't need a large model. [`intfloat/e5-small`](https://huggingface.co/intfloat/e5-small) will be a good first attempt. The great thing about PostgresML is you can always regenerate your embeddings later to experiment with different embedding models. +Since our corpus of documents (movie reviews) are all relatively short and similar in style, we don't need a large model. [`Alibaba-NLP/gte-base-en-v1.5`](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) will be a good first attempt. The great thing about PostgresML is you can always regenerate your embeddings later to experiment with different embedding models. -It takes a couple of minutes to download and cache the `intfloat/e5-small` model to generate the first embedding. After that, it's pretty fast. +It takes a couple of minutes to download and cache the `Alibaba-NLP/gte-base-en-v1.5` model to generate the first embedding. After that, it's pretty fast. Note how we prefix the text we want to embed with either `passage:` or `query:` , the e5 model requires us to prefix our data with `passage:` if we're generating embeddings for our corpus and `query:` if we want to find semantically similar content. ```postgresql -SELECT pgml.embed('intfloat/e5-small', 'passage: hi mom'); +SELECT pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'passage: hi mom'); ``` This is a pretty powerful function, because we can pass any arbitrary text to any open source model, and it will generate an embedding for us. We can benchmark how long it takes to generate an embedding for a single review, using client-side timings in Postgres: @@ -147,7 +147,7 @@ Aside from using this function with strings passed from a client, we can use it ```postgresql SELECT review_body, - pgml.embed('intfloat/e5-small', 'passage: ' || review_body) + pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'passage: ' || review_body) FROM pgml.amazon_us_reviews LIMIT 1; ``` @@ -171,7 +171,7 @@ Time to generate an embedding increases with the length of the input text, and v ```postgresql SELECT review_body, - pgml.embed('intfloat/e5-small', 'passage: ' || review_body) AS embedding + pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'passage: ' || review_body) AS embedding FROM pgml.amazon_us_reviews LIMIT 1000; ``` @@ -190,7 +190,7 @@ We can also do a quick sanity check to make sure we're really getting value out SELECT reviqew_body, pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', 'passage: ' || review_body, '{"device": "cpu"}' ) AS embedding @@ -224,6 +224,12 @@ You can also find embedding models that outperform OpenAI's `text-embedding-ada- The current leading model is `hkunlp/instructor-xl`. Instructor models take an additional `instruction` parameter which includes context for the embeddings use case, similar to prompts before text generation tasks. +!!! note + + "Alibaba-NLP/gte-base-en-v1.5" surpassed the quality of instructor-xl, and should be used instead, but we've left this documentation available for existing users + +!!! + Instructions can provide a "classification" or "topic" for the text: #### Classification @@ -325,7 +331,7 @@ BEGIN UPDATE pgml.amazon_us_reviews SET review_embedding_e5_large = pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'passage: ' || review_body ) WHERE id BETWEEN i AND i + 10 diff --git a/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md b/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md index 734371233..b9d4b48e8 100644 --- a/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md +++ b/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md @@ -137,7 +137,7 @@ We can find a customer that our embeddings model feels is close to the sentiment ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: I love all Star Wars, but Empire Strikes Back is particularly amazing' )::vector(1024) AS embedding ) @@ -214,7 +214,7 @@ Now we can write our personalized SQL query. It's nearly the same as our query f -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md b/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md index fef4e7b5e..e32515f00 100644 --- a/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md +++ b/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md @@ -127,9 +127,7 @@ cp .env.template .env ```bash OPENAI_API_KEY= DATABASE_URL= -MODEL=hkunlp/instructor-xl -MODEL_PARAMS={"instruction": "Represent the document for retrieval: "} -QUERY_PARAMS={"instruction": "Represent the question for retrieving supporting documents: "} +MODEL=Alibaba-NLP/gte-base-en-v1.5 SYSTEM_PROMPT=<> # System prompt used for OpenAI chat completion BASE_PROMPT=<> # Base prompt used for OpenAI chat completion for each turn SLACK_BOT_TOKEN= # Slack bot token to run Slack chat service @@ -332,7 +330,7 @@ Once the discord app is running, you can interact with the chatbot on Discord as ### PostgresML vs. Hugging Face + Pinecone -To evaluate query latency, we performed an experiment with 10,000 Wikipedia documents from the SQuAD dataset. Embeddings were generated using the intfloat/e5-large model. +To evaluate query latency, we performed an experiment with 10,000 Wikipedia documents from the SQuAD dataset. Embeddings were generated using the Alibaba-NLP/gte-base-en-v1.5 model. For PostgresML, we used a GPU-powered serverless database running on NVIDIA A10G GPUs with client in us-west-2 region. For HuggingFace, we used their inference API endpoint running on NVIDIA A10G GPUs in us-east-1 region and a client in the same us-east-1 region. Pinecone was used as the vector search index for HuggingFace embeddings. diff --git a/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md b/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md index 621bc99ea..cdd455bf0 100644 --- a/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md +++ b/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md @@ -45,7 +45,7 @@ Let's run that query again: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -100,7 +100,7 @@ Now let's try the query again utilizing the new HNSW index we created. ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) diff --git a/pgml-cms/blog/the-1.0-sdk-is-here.md b/pgml-cms/blog/the-1.0-sdk-is-here.md index 94464d566..9486d77cf 100644 --- a/pgml-cms/blog/the-1.0-sdk-is-here.md +++ b/pgml-cms/blog/the-1.0-sdk-is-here.md @@ -50,7 +50,7 @@ const pipeline = pgml.newPipeline("my_pipeline", { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -90,7 +90,7 @@ pipeline = Pipeline( "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "intfloat/e5-small", + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, diff --git a/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md b/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md index 4724740df..f73c6c617 100644 --- a/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md +++ b/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md @@ -124,7 +124,7 @@ We'll start with semantic search. Given a user query, e.g. "Best 1980's scifi mo ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -171,7 +171,7 @@ Generating a query plan more quickly and only computing the values once, may mak There's some good stuff happening in those query results, so let's break it down: * **It's fast** - We're able to generate a request embedding on the fly with a state-of-the-art model, and search 5M reviews in 152ms, including fetching the results back to the client 😍. You can't even generate an embedding from OpenAI's API in that time, much less search 5M reviews in some other database with it. -* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-large` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). +* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `Alibaba-NLP/gte-base-en-v1.5` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). * Qualitatively: the embeddings understand our request for `scifi` being equivalent to `Sci-Fi`, `sci-fi`, `SciFi`, and `sci fi`, as well as `1980's` matching `80s` and `80's` and is close to `seventies` (last place). We didn't have to configure any of this and the most enthusiastic for "best" is at the top, the least enthusiastic is at the bottom, so the model has appropriately captured "sentiment". * Quantitatively: the `cosine_similarity` of all results are high and tight, 0.90-0.95 on a scale from -1:1. We can be confident we recalled very similar results from our 5M candidates, even though it would take 485 times as long to check all of them directly. * **It's reliable** - The model is stored in the database, so we don't need to worry about managing a separate service. If you repeat this query over and over, the timings will be extremely consistent, because we don't have to deal with things like random network congestion. @@ -254,7 +254,7 @@ Now we can quickly search for movies by what people have said about them: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -312,7 +312,7 @@ SET ivfflat.probes = 300; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -401,7 +401,7 @@ SET ivfflat.probes = 1; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -457,7 +457,7 @@ SQL is a very expressive language that can handle a lot of complexity. To keep t -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md index 0edb3dc2c..f54388066 100644 --- a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md +++ b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md @@ -58,7 +58,7 @@ class EmbedSmallExpression(models.Expression): self.embedding_field = field def as_sql(self, compiler, connection, template=None): - return f"pgml.embed('intfloat/e5-small', {self.embedding_field})", None + return f"pgml.embed('Alibaba-NLP/gte-base-en-v1.5', {self.embedding_field})", None ``` And that's it! In just a few lines of code, we're generating and storing high quality embeddings automatically in our database. No additional setup is required, and all the AI complexity is taken care of by PostgresML. @@ -70,7 +70,7 @@ Djago Rest Framework provides the bulk of the implementation. We just added a `M ```python results = TodoItem.objects.annotate( similarity=RawSQL( - "pgml.embed('intfloat/e5-small', %s)::vector(384) <=> embedding", + "pgml.embed('Alibaba-NLP/gte-base-en-v1.5', %s)::vector(384) <=> embedding", [query], ) ).order_by("similarity") @@ -115,7 +115,7 @@ In return, you'll get your to-do item alongside the embedding of the `descriptio The embedding contains 384 floating point numbers; we removed most of them in this blog post to make sure it fits on the page. -You can try creating multiple to-do items for fun and profit. If the description is changed, so will the embedding, demonstrating how the `intfloat/e5-small` model understands the semantic meaning of your text. +You can try creating multiple to-do items for fun and profit. If the description is changed, so will the embedding, demonstrating how the `Alibaba-NLP/gte-base-en-v1.5` model understands the semantic meaning of your text. ### Searching diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md index 881be3046..866610b92 100644 --- a/pgml-cms/docs/api/client-sdk/README.md +++ b/pgml-cms/docs/api/client-sdk/README.md @@ -80,7 +80,7 @@ const pipeline = pgml.newPipeline("sample_pipeline", { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -98,7 +98,7 @@ pipeline = Pipeline( "text": { "splitter": { "model": "recursive_character" }, "semantic_search": { - "model": "intfloat/e5-small", + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, @@ -111,7 +111,7 @@ await collection.add_pipeline(pipeline) The pipeline configuration is a key/value object, where the key is the name of a column in a document, and the value is the action the SDK should perform on that column. -In this example, the documents contain a column called `text` which we are instructing the SDK to chunk the contents of using the recursive character splitter, and to embed those chunks using the Hugging Face `intfloat/e5-small` embeddings model. +In this example, the documents contain a column called `text` which we are instructing the SDK to chunk the contents of using the recursive character splitter, and to embed those chunks using the Hugging Face `Alibaba-NLP/gte-base-en-v1.5` embeddings model. ### Add documents diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md index dd2ce0e49..cf91f95ee 100644 --- a/pgml-cms/docs/api/client-sdk/document-search.md +++ b/pgml-cms/docs/api/client-sdk/document-search.md @@ -10,17 +10,14 @@ This section will assume we have previously ran the following code: const pipeline = pgml.newPipeline("test_pipeline", { abstract: { semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, full_text_search: { configuration: "english" }, }, body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -36,17 +33,14 @@ pipeline = Pipeline( { "abstract": { "semantic_search": { - "model": "intfloat/e5-small", + "model": "Alibaba-NLP/gte-base-en-v1.5", }, "full_text_search": {"configuration": "english"}, }, "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index bc1dde8a9..c51987cad 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -32,10 +32,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -53,10 +50,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, @@ -76,10 +70,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -94,10 +85,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, @@ -151,7 +139,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", hnsw: { m: 100, ef_construction: 200 @@ -170,7 +158,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "intfloat/e5-small", + "model": "Alibaba-NLP/gte-base-en-v1.5", "hnsw": {"m": 100, "ef_construction": 200}, }, }, diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 1355368a0..8318a8bee 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -10,17 +10,14 @@ This section will assume we have previously ran the following code: const pipeline = pgml.newPipeline("test_pipeline", { abstract: { semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, full_text_search: { configuration: "english" }, }, body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -36,17 +33,14 @@ pipeline = Pipeline( { "abstract": { "semantic_search": { - "model": "intfloat/e5-small", + "model": "Alibaba-NLP/gte-base-en-v1.5", }, "full_text_search": {"configuration": "english"}, }, "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, diff --git a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md index f0d54c64a..4c28a9714 100644 --- a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md +++ b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md @@ -4,7 +4,7 @@ description: Example for Semantic Search # Semantic Search Using Instructor Model -This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. In this tutorial we use [hkunlp/instructor-base](https://huggingface.co/hkunlp/instructor-base), a more advanced embeddings model that takes parameters when doing embedding and recall. +This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. In this tutorial we use [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5). [Link to full JavaScript implementation](https://github.com/postgresml/postgresml/blob/master/pgml-sdks/pgml/javascript/examples/question_answering.js) @@ -71,7 +71,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -88,10 +88,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, } }, diff --git a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md index e7d401826..a754063ff 100644 --- a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md +++ b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md @@ -73,7 +73,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); @@ -89,7 +89,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-cms/docs/api/sql-extension/README.md b/pgml-cms/docs/api/sql-extension/README.md index 0eda2f934..7640943c7 100644 --- a/pgml-cms/docs/api/sql-extension/README.md +++ b/pgml-cms/docs/api/sql-extension/README.md @@ -31,8 +31,8 @@ Using a SQL function for interacting with open-source models makes things really ```postgresql SELECT pgml.embed( - 'intfloat/e5-small', - 'This text will be embedded using the intfloat/e5-small model.' + 'Alibaba-NLP/gte-base-en-v1.5', + 'This text will be embedded using the Alibaba-NLP/gte-base-en-v1.5 model.' ) AS embedding; ``` diff --git a/pgml-cms/docs/guides/chatbots/README.md b/pgml-cms/docs/guides/chatbots/README.md index 333cbfa8f..42a1b2c68 100644 --- a/pgml-cms/docs/guides/chatbots/README.md +++ b/pgml-cms/docs/guides/chatbots/README.md @@ -428,7 +428,7 @@ import asyncio wiki_wiki = wikipediaapi.Wikipedia("Chatbot Tutorial Project", "en") # Use the default model for embedding and default splitter for splitting -model = Model() # The default model is intfloat/e5-small +model = Model() # The default model is Alibaba-NLP/gte-base-en-v1.5 splitter = Splitter() # The default splitter is recursive_character # Construct a pipeline for ingesting documents, splitting them into chunks, and then embedding them diff --git a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md index ea829ed0b..dc933b7b5 100644 --- a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md +++ b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md @@ -1,67 +1,83 @@ # Dimensionality Reduction -In the case of embedding models trained on large bodies of text, most of the concepts they learn will be unused when dealing with any single piece of text. For collections of documents that deal with specific topics, only a fraction of the language models learned associations will be relevant. Dimensionality reduction is an important technique to improve performance _on your documents_, both in terms of quality and latency for embedding recall using nearest neighbor search. +In the case of embedding models trained on large bodies of text, most of the concepts they learn will be unused when +dealing with any single piece of text. For collections of documents that deal with specific topics, only a fraction of +the language models learned associations will be relevant. Dimensionality reduction is an important technique to improve +performance _on your documents_, both in terms of quality and latency for embedding recall using nearest neighbor +search. ## Why Dimensionality Reduction? -- **Improved Performance**: Reducing the number of dimensions can significantly improve the computational efficiency of machine learning algorithms. +- **Improved Performance**: Reducing the number of dimensions can significantly improve the computational efficiency of + machine learning algorithms. - **Reduced Storage**: Lower-dimensional data requires less storage space. - **Enhanced Visualization**: It is easier to visualize data in two or three dimensions. ## What is Matrix Decomposition? -Dimensionality reduction is a key technique in machine learning and data analysis, particularly when dealing with high-dimensional data such as embeddings. A table full of embeddings can be considered a matrix, aka a 2-dimensional array with rows and columns, where the embedding dimensions are the columns. We can use matrix decomposition methods, such as Principal Component Analysis (PCA) and Singular Value Decomposition (SVD), to reduce the dimensionality of embeddings. - -Matrix decomposition involves breaking down a matrix into simpler, constituent matrices. The most common decomposition techniques for this purpose are: -- **Principal Component Analysis (PCA)**: Reduces dimensionality by projecting data onto a lower-dimensional subspace that captures the most variance. -- **Singular Value Decomposition (SVD)**: Factorizes a matrix into three matrices, capturing the essential features in a reduced form. +Dimensionality reduction is a key technique in machine learning and data analysis, particularly when dealing with +high-dimensional data such as embeddings. A table full of embeddings can be considered a matrix, aka a 2-dimensional +array with rows and columns, where the embedding dimensions are the columns. We can use matrix decomposition methods, +such as Principal Component Analysis (PCA) and Singular Value Decomposition (SVD), to reduce the dimensionality of +embeddings. + +Matrix decomposition involves breaking down a matrix into simpler, constituent matrices. The most common decomposition +techniques for this purpose are: + +- **Principal Component Analysis (PCA)**: Reduces dimensionality by projecting data onto a lower-dimensional subspace + that captures the most variance. +- **Singular Value Decomposition (SVD)**: Factorizes a matrix into three matrices, capturing the essential features in a + reduced form. ## Dimensionality Reduction with PostgresML -PostgresML allows in-database execution of matrix decomposition techniques, enabling efficient dimensionality reduction directly within the database environment. + +PostgresML allows in-database execution of matrix decomposition techniques, enabling efficient dimensionality reduction +directly within the database environment. ## Step-by-Step Guide to Using Matrix Decomposition -### Preparing the data +### Preparing the data + We'll create a set of embeddings using modern embedding model with 384 dimensions. ```postgresql -CREATE TABLE documents_with_embeddings ( -id SERIAL PRIMARY KEY, -body TEXT, -embedding FLOAT[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('intfloat/e5-small-v2', body))) STORED +CREATE TABLE documents_with_embeddings +( + id serial PRIMARY KEY, + body text, + embedding float[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('Alibaba-NLP/gte-base-en-v1.5', body))) STORED ); ``` - + !!! generic - + !!! code_block time="46.823" - + ```postgresql INSERT INTO documents_with_embeddings (body) VALUES -- embedding vectors are automatically generated - ('Example text data'), - ('Another example document'), - ('Some other thing'), - ('We need a few more documents'), - ('At least as many documents as dimensions in the reduction'), - ('Which normally isn''t a problem'), - ('Unless you''re typing out a bunch of demo data'); + ('Example text data'), + ('Another example document'), + ('Some other thing'), + ('We need a few more documents'), + ('At least as many documents as dimensions in the reduction'), + ('Which normally isn''t a problem'), + ('Unless you''re typing out a bunch of demo data'); ``` - + !!! - + !!! results - + ```postgresql INSERT 0 3 ``` - -!!! - + !!! +!!! -!!! generic +!!! generic !!! code_block time="14.259ms" @@ -85,7 +101,9 @@ FROM documents_with_embeddings; ### Decomposition -Models can be trained using `pgml.train` on unlabeled data to identify important features within the data. To decompose a dataset into it's principal components, we can use the table or a view. Since decomposition is an unsupervised algorithm, we don't need a column that represents a label as one of the inputs to `pgml.train`. +Models can be trained using `pgml.train` on unlabeled data to identify important features within the data. To decompose +a dataset into it's principal components, we can use the table or a view. Since decomposition is an unsupervised +algorithm, we don't need a column that represents a label as one of the inputs to `pgml.train`. Train a simple model to find reduce dimensions for 384, to the 3: @@ -94,7 +112,8 @@ Train a simple model to find reduce dimensions for 384, to the 3: !!! code_block time="48.087 ms" ```postgresql -SELECT * FROM pgml.train('Embedding Components', 'decomposition', 'just_embeddings', hyperparams => '{"n_components": 3}'); +SELECT * +FROM pgml.train('Embedding Components', 'decomposition', 'just_embeddings', hyperparams => '{"n_components": 3}'); ``` !!! @@ -114,7 +133,12 @@ INFO: Deploying model id: 2 !!! -Note that the input vectors have been reduced from 384 dimensions to 3 that explain 69% of the variance across all samples. That's a more than 100x size reduction, while preserving 69% of the information. These 3 dimensions may be plenty for a course grained first pass ranking with a vector database distance function, like cosine similarity. You can then choose to use the full embeddings, or some other reduction, or the raw text with a reranker model to improve final relevance over the baseline with all the extra time you have now that you've reduced the cost of initial nearest neighbor recall 100x. +Note that the input vectors have been reduced from 384 dimensions to 3 that explain 69% of the variance across all +samples. That's a more than 100x size reduction, while preserving 69% of the information. These 3 dimensions may be +plenty for a course grained first pass ranking with a vector database distance function, like cosine similarity. You can +then choose to use the full embeddings, or some other reduction, or the raw text with a reranker model to improve final +relevance over the baseline with all the extra time you have now that you've reduced the cost of initial nearest +neighbor recall 100x. You can check out the components for any vector in this space using the reduction model: @@ -140,4 +164,6 @@ LIMIT 10; !!! -Exercise for the reader: Where is the sweet spot for number of dimensions, yet preserving say, 99% of the relevance data? How much of the cumulative explained variance do you need to preserve 100% to return the top N results for the reranker, if you feed the reranker top K using cosine similarity or another vector distance function? +Exercise for the reader: Where is the sweet spot for number of dimensions, yet preserving say, 99% of the relevance +data? How much of the cumulative explained variance do you need to preserve 100% to return the top N results for the +reranker, if you feed the reranker top K using cosine similarity or another vector distance function? diff --git a/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md b/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md index 27c9d9227..0e3b12333 100644 --- a/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md +++ b/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md @@ -177,7 +177,7 @@ ALTER TABLE bulk_access_users ADD COLUMN embedding vector(384); UPDATE bulk_access_users -SET embedding = pgml.embed('intfloat/e5-small', email); +SET embedding = pgml.embed('Alibaba-NLP/gte-base-en-v1.5', email); ``` {% endtab %} diff --git a/pgml-cms/docs/product/vector-database.md b/pgml-cms/docs/product/vector-database.md index 825b24eaa..a28d88218 100644 --- a/pgml-cms/docs/product/vector-database.md +++ b/pgml-cms/docs/product/vector-database.md @@ -52,7 +52,7 @@ At first, the column is empty. To generate embeddings, we can use the PostgresML UPDATE usa_house_prices SET embedding = pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', address ); ``` @@ -121,7 +121,7 @@ SELECT FROM usa_house_prices ORDER BY embedding <=> pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', '1 Infinite Loop' )::vector(384) LIMIT 3; @@ -212,7 +212,7 @@ EXPLAIN SELECT FROM usa_house_prices ORDER BY embedding <=> pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', '1 Infinite Loop' )::vector(384) LIMIT 3; diff --git a/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md b/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md index 0e12409ed..abd391854 100644 --- a/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md +++ b/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md @@ -272,7 +272,7 @@ To perform an ANN search using the indexes we created, we don't have to do anyth SELECT review_body, review_embedding_e5_large <=> pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'this chair was amazing' )::vector(1024) AS cosine_distance FROM amazon_reviews_with_embedding diff --git a/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md b/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md index 5007ed8e0..a84c38999 100644 --- a/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md +++ b/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md @@ -108,7 +108,7 @@ To generate an embedding, all you have to do is use the `pgml.embed(model_name, ```postgresql SELECT pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', 'passage: PostgresML is so easy!' ); ``` @@ -119,7 +119,7 @@ SELECT pgml.embed( ``` postgres=# SELECT pgml.embed( - 'intfloat/e5-small', + 'Alibaba-NLP/gte-base-en-v1.5', 'passage: PostgresML is so easy!' ); diff --git a/pgml-cms/docs/use-cases/chatbots.md b/pgml-cms/docs/use-cases/chatbots.md index 17668b0e0..d26481cf7 100644 --- a/pgml-cms/docs/use-cases/chatbots.md +++ b/pgml-cms/docs/use-cases/chatbots.md @@ -45,9 +45,7 @@ wget https://raw.githubusercontent.com/postgresml/postgresml/master/pgml-apps/pg ```bash OPENAI_API_KEY= DATABASE_URL= -MODEL=hkunlp/instructor-xl -MODEL_PARAMS={"instruction": "Represent the Wikipedia document for retrieval: "} -QUERY_PARAMS={"instruction": "Represent the Wikipedia question for retrieving supporting documents: "} +MODEL=Alibaba-NLP/gte-base-en-v1.5 SYSTEM_PROMPT="You are an assistant to answer questions about an open source software named PostgresML. Your name is PgBot. You are based out of San Francisco, California." BASE_PROMPT="Given relevant parts of a document and a question, create a final answer.\ Include a SQL query in the answer wherever possible. \ diff --git a/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md b/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md index d6094233b..229d76554 100644 --- a/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md +++ b/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md @@ -122,7 +122,7 @@ We can find a customer that our embeddings model feels is close to the sentiment ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: I love all Star Wars, but Empire Strikes Back is particularly amazing' )::vector(1024) AS embedding ) @@ -199,7 +199,7 @@ Now we can write our personalized SQL query. It's nearly the same as our query f -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md b/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md index 7e762128b..96c99a15d 100644 --- a/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md +++ b/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md @@ -110,7 +110,7 @@ We'll start with semantic search. Given a user query, e.g. "Best 1980's scifi mo ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -157,7 +157,7 @@ Generating a query plan more quickly and only computing the values once, may mak There's some good stuff happening in those query results, so let's break it down: * **It's fast** - We're able to generate a request embedding on the fly with a state-of-the-art model, and search 5M reviews in 152ms, including fetching the results back to the client 😍. You can't even generate an embedding from OpenAI's API in that time, much less search 5M reviews in some other database with it. -* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-large` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). +* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `Alibaba-NLP/gte-base-en-v1.5` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). * Qualitatively: the embeddings understand our request for `scifi` being equivalent to `Sci-Fi`, `sci-fi`, `SciFi`, and `sci fi`, as well as `1980's` matching `80s` and `80's` and is close to `seventies` (last place). We didn't have to configure any of this and the most enthusiastic for "best" is at the top, the least enthusiastic is at the bottom, so the model has appropriately captured "sentiment". * Quantitatively: the `cosine_similarity` of all results are high and tight, 0.90-0.95 on a scale from -1:1. We can be confident we recalled very similar results from our 5M candidates, even though it would take 485 times as long to check all of them directly. * **It's reliable** - The model is stored in the database, so we don't need to worry about managing a separate service. If you repeat this query over and over, the timings will be extremely consistent, because we don't have to deal with things like random network congestion. @@ -240,7 +240,7 @@ Now we can quickly search for movies by what people have said about them: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -298,7 +298,7 @@ SET ivfflat.probes = 300; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -386,7 +386,7 @@ SET ivfflat.probes = 1; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -442,7 +442,7 @@ SQL is a very expressive language that can handle a lot of complexity. To keep t -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'Alibaba-NLP/gte-base-en-v1.5', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py index 2a1cf5ddd..ac78f5f6c 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py @@ -14,7 +14,7 @@ async def main(): collection_name = "squad_collection_benchmark" collection = await db.create_or_get_collection(collection_name) - model_id = await collection.register_model(model_name="intfloat/e5-large") + model_id = await collection.register_model(model_name="Alibaba-NLP/gte-base-en-v1.5") await collection.generate_embeddings(model_id=model_id) if __name__ == "__main__": diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql index 4bd8f82ad..d1884f6be 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql @@ -14,7 +14,7 @@ BEGIN INTO curr_val; -- Use the correct syntax to call pgml.embed and store the result - PERFORM embed FROM pgml.embed('intfloat/e5-large', curr_val); + PERFORM embed FROM pgml.embed('Alibaba-NLP/gte-base-en-v1.5', curr_val); curr_id := curr_id + batch_size; EXIT WHEN curr_id >= total_records; @@ -26,7 +26,7 @@ BEGIN INTO curr_val; -- Use the correct syntax to call pgml.embed and store the result - PERFORM embed FROM pgml.embed('intfloat/e5-large', curr_val); + PERFORM embed FROM pgml.embed('Alibaba-NLP/gte-base-en-v1.5', curr_val); END; $$; diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py index 9a0d29206..01841755e 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py @@ -20,7 +20,7 @@ async def main(): data = load_dataset("squad", split="train") data = data.to_pandas() data = data.drop_duplicates(subset=["context"]) - model_id = await collection.register_model(model_name="intfloat/e5-large") + model_id = await collection.register_model(model_name="Alibaba-NLP/gte-base-en-v1.5") run_times = [] for query in data["context"][0:100]: start = time() diff --git a/pgml-dashboard/src/components/pages/demo/template.html b/pgml-dashboard/src/components/pages/demo/template.html index 4e1ef82de..af9536ddf 100644 --- a/pgml-dashboard/src/components/pages/demo/template.html +++ b/pgml-dashboard/src/components/pages/demo/template.html @@ -14,12 +14,12 @@
<%+ small_table::Table::new(&["Model", "Performance", "Cost"], &[ small_table::Row::new(&[ - "intfloat/e5-small".into(), + "Alibaba-NLP/gte-base-en-v1.5".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large".into(), + "Alibaba-NLP/gte-base-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), @@ -34,12 +34,12 @@
<%+ large_table::Table::new(&["Model", "Performance", "Cost"], &[ large_table::Row::new(&[ - "intfloat/e5-small".into(), + "Alibaba-NLP/gte-base-en-v1.5".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large".into(), + "Alibaba-NLP/gte-base-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index 4cb4b136c..291e5839e 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -1267,10 +1267,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", } }, "contents": { @@ -1281,10 +1278,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", } } }) diff --git a/pgml-extension/examples/transformers.sql b/pgml-extension/examples/transformers.sql index 8734cdb45..6c0c647b7 100644 --- a/pgml-extension/examples/transformers.sql +++ b/pgml-extension/examples/transformers.sql @@ -2,9 +2,9 @@ -- \set ON_ERROR_STOP true \timing on -SELECT pgml.embed('intfloat/e5-small', 'hi mom'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cuda"}'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}'); +SELECT pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'hi mom'); +SELECT pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'hi mom', '{"device": "cuda"}'); +SELECT pgml.embed('Alibaba-NLP/gte-base-en-v1.5', 'hi mom', '{"device": "cpu"}'); SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}'); SELECT pgml.embed('mixedbread-ai/mxbai-embed-large-v1', 'test', '{"prompt": "test prompt: "}'); diff --git a/pgml-sdks/pgml/javascript/examples/README.md b/pgml-sdks/pgml/javascript/examples/README.md index 22eb39ddc..bfdad7517 100644 --- a/pgml-sdks/pgml/javascript/examples/README.md +++ b/pgml-sdks/pgml/javascript/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.js) -This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `Alibaba-NLP/gte-base-en-v1.5` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.js) This is an example to find documents relevant to a question from the collection of documents. The query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructore Model](./question_answering_instructor.js) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `Alibaba-NLP/gte-base-en-v1.5` model. ## [Extractive Question Answering](./extractive_question_answering.js) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js index 0ab69decb..dbbacfcd9 100644 --- a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering.js b/pgml-sdks/pgml/javascript/examples/question_answering.js index 0d4e08844..e8b9acbfe 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js index bb265cc6a..b9922c712 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js @@ -10,10 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: " - } + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/semantic_search.js b/pgml-sdks/pgml/javascript/examples/semantic_search.js index a40970768..2ebf69738 100644 --- a/pgml-sdks/pgml/javascript/examples/semantic_search.js +++ b/pgml-sdks/pgml/javascript/examples/semantic_search.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js index 5afeba45c..e505c9590 100644 --- a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "Alibaba-NLP/gte-base-en-v1.5", }, }, }); diff --git a/pgml-sdks/pgml/python/examples/README.md b/pgml-sdks/pgml/python/examples/README.md index 3cd4298e6..8c79a053b 100644 --- a/pgml-sdks/pgml/python/examples/README.md +++ b/pgml-sdks/pgml/python/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.py) -This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `Alibaba-NLP/gte-base-en-v1.5` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.py) This is an example to find documents relevant to a question from the collection of documents. It loads the Stanford Question Answering Dataset (SQuAD) into the database, generates chunks and embeddings. Query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructor Model](./question_answering_instructor.py) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `Alibaba-NLP/gte-base-en-v1.5` model. ## [Extractive Question Answering](./extractive_question_answering.py) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/python/examples/extractive_question_answering.py b/pgml-sdks/pgml/python/examples/extractive_question_answering.py index 21a0060f5..cfac5d279 100644 --- a/pgml-sdks/pgml/python/examples/extractive_question_answering.py +++ b/pgml-sdks/pgml/python/examples/extractive_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering.py b/pgml-sdks/pgml/python/examples/question_answering.py index d4b2cc082..fabe45b3d 100644 --- a/pgml-sdks/pgml/python/examples/question_answering.py +++ b/pgml-sdks/pgml/python/examples/question_answering.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering_instructor.py b/pgml-sdks/pgml/python/examples/question_answering_instructor.py index ba0069837..44ae565c8 100644 --- a/pgml-sdks/pgml/python/examples/question_answering_instructor.py +++ b/pgml-sdks/pgml/python/examples/question_answering_instructor.py @@ -20,10 +20,7 @@ async def main(): "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, } }, diff --git a/pgml-sdks/pgml/python/examples/rag_question_answering.py b/pgml-sdks/pgml/python/examples/rag_question_answering.py index 2558287f6..ed0cad73d 100644 --- a/pgml-sdks/pgml/python/examples/rag_question_answering.py +++ b/pgml-sdks/pgml/python/examples/rag_question_answering.py @@ -23,7 +23,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/semantic_search.py b/pgml-sdks/pgml/python/examples/semantic_search.py index 9a4e134e5..07b8d8cc6 100644 --- a/pgml-sdks/pgml/python/examples/semantic_search.py +++ b/pgml-sdks/pgml/python/examples/semantic_search.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py index 862830277..f70be2f49 100644 --- a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py +++ b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "Alibaba-NLP/gte-base-en-v1.5"}, } }, ) diff --git a/pgml-sdks/pgml/python/tests/stress_test.py b/pgml-sdks/pgml/python/tests/stress_test.py index 552193690..9b211b95d 100644 --- a/pgml-sdks/pgml/python/tests/stress_test.py +++ b/pgml-sdks/pgml/python/tests/stress_test.py @@ -22,10 +22,7 @@ "model": "recursive_character", }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "Alibaba-NLP/gte-base-en-v1.5", }, }, }, diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index 432654298..096465f06 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -70,7 +70,7 @@ impl Default for Model { impl Model { /// Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { - let name = name.unwrap_or("intfloat/e5-small".to_string()); + let name = name.unwrap_or("Alibaba-NLP/gte-base-en-v1.5".to_string()); let parameters = parameters.unwrap_or(Json(serde_json::json!({}))); let source = source.unwrap_or("pgml".to_string()); let runtime: ModelRuntime = source.as_str().into(); diff --git a/pgml-sdks/pgml/src/sql/remote.sql b/pgml-sdks/pgml/src/sql/remote.sql index d44b7b84f..883baa304 100644 --- a/pgml-sdks/pgml/src/sql/remote.sql +++ b/pgml-sdks/pgml/src/sql/remote.sql @@ -20,12 +20,12 @@ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''test postgresml embedding'') AS embedding' + 'SELECT pgml.embed(''Alibaba-NLP/gte-base-en-v1.5'', ''test postgresml embedding'') AS embedding' ) AS t(embedding real[386]); CREATE FUNCTION pgml_embed_e5_small(text) RETURNS real[386] AS $$ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''' || $1 || ''') AS embedding' + 'SELECT pgml.embed(''Alibaba-NLP/gte-base-en-v1.5'', ''' || $1 || ''') AS embedding' ) AS t(embedding real[386]); $$ LANGUAGE SQL; From 55e7faf54ddb6d47e07cdb3bcf6f25f6f8d88ec7 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 12:38:14 -0700 Subject: [PATCH 2/6] rename zephyr --- pgml-apps/pgml-chat/pgml_chat/main.py | 2 +- ...rom-closed-to-open-source-ai-in-minutes.md | 8 +++---- ...with-postgresml-and-dbt-data-build-tool.md | 4 ++-- pgml-cms/docs/api/sql-extension/pgml.embed.md | 2 +- .../sql-extension/pgml.transform/README.md | 4 ++-- pgml-cms/docs/guides/opensourceai.md | 17 +++++++------- .../getting-started/connect-your-app.md | 4 ++-- ...with-postgresml-and-dbt-data-build-tool.md | 4 ++-- .../src/components/pages/demo/template.html | 22 ++++++++++++++----- .../examples/dbt/embeddings/README.md | 4 ++-- .../examples/dbt/embeddings/dbt_project.yml | 2 +- pgml-extension/examples/transformers.sql | 2 +- .../python/examples/rag_question_answering.py | 2 +- pgml-sdks/pgml/src/open_source_ai.rs | 6 ++--- 14 files changed, 47 insertions(+), 36 deletions(-) diff --git a/pgml-apps/pgml-chat/pgml_chat/main.py b/pgml-apps/pgml-chat/pgml_chat/main.py index 0d1ae6c28..e9ac079ea 100644 --- a/pgml-apps/pgml-chat/pgml_chat/main.py +++ b/pgml-apps/pgml-chat/pgml_chat/main.py @@ -123,7 +123,7 @@ def handler(signum, frame): "--chat_completion_model", dest="chat_completion_model", type=str, - default="HuggingFaceH4/zephyr-7b-beta", + default="meta-llama/Meta-Llama-3-8B-Instruct", ) parser.add_argument( diff --git a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md index 6cce2a3f2..01e96a9e7 100644 --- a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md +++ b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md @@ -44,7 +44,7 @@ The Switch Kit is an open-source AI SDK that provides a drop in replacement for const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -65,7 +65,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -96,7 +96,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -113,7 +113,7 @@ We don't charge per token, so OpenAI “usage” metrics are not particularly re !!! -The above is an example using our open-source AI SDK with zephyr-7b-beta, an incredibly popular and highly efficient 7 billion parameter model. +The above is an example using our open-source AI SDK with Meta-Llama-3-8B-Instruct, an incredibly popular and highly efficient 8 billion parameter model. Notice there is near one to one relation between the parameters and return type of OpenAI’s `chat.completions.create` and our `chat_completion_create`. diff --git a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 83eb7de01..d9777fbd1 100644 --- a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -119,7 +119,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -129,7 +129,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-cms/docs/api/sql-extension/pgml.embed.md b/pgml-cms/docs/api/sql-extension/pgml.embed.md index 43da6120e..1c57c2ff5 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.embed.md +++ b/pgml-cms/docs/api/sql-extension/pgml.embed.md @@ -20,7 +20,7 @@ pgml.embed( | Argument | Description | Example | |----------|-------------|---------| -| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-large-v2` | +| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-small-v2` | | text | The text to embed. This can be a string or the name of a column from a PostgreSQL table. | `'I am your father, Luke'` | | kwargs | Additional arguments that are passed to the model during inference. | | diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md index 9e13f5c2a..722d49d57 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md +++ b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md @@ -123,7 +123,7 @@ pgml.transform( SELECT pgml.transform( task => '{ "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", "device_map": "auto" @@ -148,7 +148,7 @@ def transform(task, call, inputs): transform( { "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", }, diff --git a/pgml-cms/docs/guides/opensourceai.md b/pgml-cms/docs/guides/opensourceai.md index 66e7642ef..c42a7f868 100644 --- a/pgml-cms/docs/guides/opensourceai.md +++ b/pgml-cms/docs/guides/opensourceai.md @@ -62,7 +62,7 @@ Here is a simple example using zephyr-7b-beta, one of the best 7 billion paramet const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -83,7 +83,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -114,7 +114,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -234,7 +234,7 @@ We also have asynchronous versions of the `chat_completions_create` and `chat_co const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -255,7 +255,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -284,7 +284,7 @@ results = await client.chat_completions_create_async( ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -328,7 +328,7 @@ while (!result.done) { import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_stream_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -389,6 +389,8 @@ We have truncated the output to two items We have tested the following models and verified they work with the OpenSourceAI: +* meta-llama/Meta-Llama-3-8B-Instruct +* meta-llama/Meta-Llama-3-70B-Instruct * Phind/Phind-CodeLlama-34B-v2 * HuggingFaceH4/zephyr-7b-beta * deepseek-ai/deepseek-llm-7b-chat @@ -399,7 +401,6 @@ We have tested the following models and verified they work with the OpenSourceAI * Open-Orca/Mistral-7B-OpenOrca * teknium/OpenHermes-2.5-Mistral-7B * mistralai/Mistral-7B-Instruct-v0.1 -* HuggingFaceH4/zephyr-7b-beta Any model on hugging face should work with our OpenSourceAI. Here is an example of using one of the more popular quantized models from [TheBloke](https://huggingface.co/TheBloke). diff --git a/pgml-cms/docs/introduction/getting-started/connect-your-app.md b/pgml-cms/docs/introduction/getting-started/connect-your-app.md index 642b32597..f561fb081 100644 --- a/pgml-cms/docs/introduction/getting-started/connect-your-app.md +++ b/pgml-cms/docs/introduction/getting-started/connect-your-app.md @@ -42,7 +42,7 @@ const pgml = require("pgml"); const main = () => { const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -66,7 +66,7 @@ import pgml async def main(): client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", diff --git a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 80e9be8a2..e65c3ad5a 100644 --- a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -101,7 +101,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -111,7 +111,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-dashboard/src/components/pages/demo/template.html b/pgml-dashboard/src/components/pages/demo/template.html index af9536ddf..f6f8fb1f8 100644 --- a/pgml-dashboard/src/components/pages/demo/template.html +++ b/pgml-dashboard/src/components/pages/demo/template.html @@ -14,19 +14,24 @@
<%+ small_table::Table::new(&["Model", "Performance", "Cost"], &[ small_table::Row::new(&[ - "Alibaba-NLP/gte-base-en-v1.5".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), small_table::Row::new(&[ "Alibaba-NLP/gte-base-en-v1.5".into(), + "5ms/embedding".into(), + "$0.0000000001/embedding".into(), + ]).into(), + small_table::Row::new(&[ + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>
@@ -34,19 +39,24 @@
<%+ large_table::Table::new(&["Model", "Performance", "Cost"], &[ large_table::Row::new(&[ - "Alibaba-NLP/gte-base-en-v1.5".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), large_table::Row::new(&[ "Alibaba-NLP/gte-base-en-v1.5".into(), + "5ms/embedding".into(), + "$0.0000000001/embedding".into(), + ]).into(), + large_table::Row::new(&[ + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>
diff --git a/pgml-extension/examples/dbt/embeddings/README.md b/pgml-extension/examples/dbt/embeddings/README.md index a46f8636e..55930b0b4 100644 --- a/pgml-extension/examples/dbt/embeddings/README.md +++ b/pgml-extension/examples/dbt/embeddings/README.md @@ -75,7 +75,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -84,7 +84,7 @@ Here's a summary of the key parameters: - `splitter_name`: Specifies the name of the splitter, set as "recursive_character". - `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. - `task`: Indicates the task being performed, specified as "embedding". -- `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +- `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". - `query_stringd`: Provides a query string, set as 'Lorem ipsum 3'. - `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-extension/examples/dbt/embeddings/dbt_project.yml b/pgml-extension/examples/dbt/embeddings/dbt_project.yml index 9433d8f41..c9b26cc1d 100644 --- a/pgml-extension/examples/dbt/embeddings/dbt_project.yml +++ b/pgml-extension/examples/dbt/embeddings/dbt_project.yml @@ -10,7 +10,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" #embeddings_table_name: "embeddings_intfloat_e5_small" query_string: 'Lorem ipsum 3' limit: 2 diff --git a/pgml-extension/examples/transformers.sql b/pgml-extension/examples/transformers.sql index 6c0c647b7..83975d45a 100644 --- a/pgml-extension/examples/transformers.sql +++ b/pgml-extension/examples/transformers.sql @@ -11,7 +11,7 @@ SELECT pgml.embed('mixedbread-ai/mxbai-embed-large-v1', 'test', '{"prompt": "tes SELECT pgml.transform_stream( task => '{ "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", "device_map": "auto" diff --git a/pgml-sdks/pgml/python/examples/rag_question_answering.py b/pgml-sdks/pgml/python/examples/rag_question_answering.py index ed0cad73d..555e50d87 100644 --- a/pgml-sdks/pgml/python/examples/rag_question_answering.py +++ b/pgml-sdks/pgml/python/examples/rag_question_answering.py @@ -80,7 +80,7 @@ async def main(): # Using OpenSource LLMs for Chat Completion client = OpenSourceAI() - chat_completion_model = "HuggingFaceH4/zephyr-7b-beta" + chat_completion_model = "meta-llama/Meta-Llama-3-8B-Instruct" console.print("Generating response using %s LLM..."%chat_completion_model) response = client.chat_completions_create( model=chat_completion_model, diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index f7348ad11..e86d9f9e3 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -43,11 +43,11 @@ fn try_model_nice_name_to_model_name_and_parameters( .into(), )), - "HuggingFaceH4/zephyr-7b-beta" => Some(( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct" => Some(( + "meta-llama/Meta-Llama-3-8B-Instruct", serde_json::json!({ "task": "conversational", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "device_map": "auto", "torch_dtype": "bfloat16" }) From 4ea46b7a8e4833d083da85a75f3321b50e9f31b9 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 12:43:34 -0700 Subject: [PATCH 3/6] Update embedding search reference to use bigger embedding size --- ...sing-postgresml-with-django-and-embedding-search.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md index f54388066..0ad6d6820 100644 --- a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md +++ b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md @@ -39,20 +39,20 @@ Our Django application has only one model, the `TodoItem`. It comes with a descr ```python embedding = models.GeneratedField( expression=EmbedSmallExpression("description"), - output_field=VectorField(dimensions=384), + output_field=VectorField(dimensions=768), db_persist=True, ) ``` This little code snippet contains quite a bit of functionality. First, we use a `GeneratedField` which is a database column that's automatically populated with data from the database. The application doesn't need to input anything when a model instance is created. This is a very powerful technique to ensure data durability and accuracy. -Secondly, the generated column is using a `VectorField`. This comes from the `pgvector.django` package and defines a `vector(384)` column: a vector with 384 dimensions. +Secondly, the generated column is using a `VectorField`. This comes from the `pgvector.django` package and defines a `vector(768)` column: a vector with 768 dimensions. Lastly, the `expression` argument tells Django how to generate this field inside the database. Since PostgresML doesn't (yet) come with a Django plugin, we had to write the expression class ourselves. Thankfully, Django makes this very easy: ```python class EmbedSmallExpression(models.Expression): - output_field = VectorField(null=False, blank=False, dimensions=384) + output_field = VectorField(null=False, blank=False, dimensions=768) def __init__(self, field): self.embedding_field = field @@ -70,7 +70,7 @@ Djago Rest Framework provides the bulk of the implementation. We just added a `M ```python results = TodoItem.objects.annotate( similarity=RawSQL( - "pgml.embed('Alibaba-NLP/gte-base-en-v1.5', %s)::vector(384) <=> embedding", + "pgml.embed('Alibaba-NLP/gte-base-en-v1.5', %s)::vector(768) <=> embedding", [query], ) ).order_by("similarity") @@ -113,7 +113,7 @@ In return, you'll get your to-do item alongside the embedding of the `descriptio } ``` -The embedding contains 384 floating point numbers; we removed most of them in this blog post to make sure it fits on the page. +The embedding contains 768 floating point numbers; we removed most of them in this blog post to make sure it fits on the page. You can try creating multiple to-do items for fun and profit. If the description is changed, so will the embedding, demonstrating how the `Alibaba-NLP/gte-base-en-v1.5` model understands the semantic meaning of your text. From 3a229b8f78dd49be7301ba6daffb003434c0ec45 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 12:44:59 -0700 Subject: [PATCH 4/6] Revert to intfloat/e5-small-v2 --- pgml-cms/docs/guides/embeddings/dimensionality-reduction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md index dc933b7b5..c923dd488 100644 --- a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md +++ b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md @@ -45,7 +45,7 @@ CREATE TABLE documents_with_embeddings ( id serial PRIMARY KEY, body text, - embedding float[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('Alibaba-NLP/gte-base-en-v1.5', body))) STORED + embedding float[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('intfloat/e5-small-v2', body))) STORED ); ``` From aa2a6ce2e25c2781be606beb6b89a216e781d1a7 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 12:48:47 -0700 Subject: [PATCH 5/6] Revert docs --- pgml-sdks/pgml/javascript/examples/README.md | 4 ++-- pgml-sdks/pgml/python/examples/README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pgml-sdks/pgml/javascript/examples/README.md b/pgml-sdks/pgml/javascript/examples/README.md index bfdad7517..55d9acc1c 100644 --- a/pgml-sdks/pgml/javascript/examples/README.md +++ b/pgml-sdks/pgml/javascript/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.js) -This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `Alibaba-NLP/gte-base-en-v1.5` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.js) This is an example to find documents relevant to a question from the collection of documents. The query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructore Model](./question_answering_instructor.js) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `Alibaba-NLP/gte-base-en-v1.5` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.js) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/python/examples/README.md b/pgml-sdks/pgml/python/examples/README.md index 8c79a053b..9e2f716a3 100644 --- a/pgml-sdks/pgml/python/examples/README.md +++ b/pgml-sdks/pgml/python/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.py) -This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `Alibaba-NLP/gte-base-en-v1.5` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.py) This is an example to find documents relevant to a question from the collection of documents. It loads the Stanford Question Answering Dataset (SQuAD) into the database, generates chunks and embeddings. Query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructor Model](./question_answering_instructor.py) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `Alibaba-NLP/gte-base-en-v1.5` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.py) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. From 2724e59d570b9aaa6dfe95a6bb49b4d785e7b144 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Tue, 28 May 2024 13:34:28 -0700 Subject: [PATCH 6/6] Remove duplicate key --- pgml-sdks/pgml/src/open_source_ai.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index e86d9f9e3..74cb6e82b 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -43,17 +43,6 @@ fn try_model_nice_name_to_model_name_and_parameters( .into(), )), - "meta-llama/Meta-Llama-3-8B-Instruct" => Some(( - "meta-llama/Meta-Llama-3-8B-Instruct", - serde_json::json!({ - "task": "conversational", - "model": "meta-llama/Meta-Llama-3-8B-Instruct", - "device_map": "auto", - "torch_dtype": "bfloat16" - }) - .into(), - )), - "TheBloke/Llama-2-7B-Chat-GPTQ" => Some(( "TheBloke/Llama-2-7B-Chat-GPTQ", serde_json::json!({ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy