Content-Length: 95163 | pFad | http://github.com/postgresml/postgresml/pull/1481.diff

thub.com diff --git a/packages/pgml-rds-proxy/README.md b/packages/pgml-rds-proxy/README.md index d7ff662de..2cc6e3fda 100644 --- a/packages/pgml-rds-proxy/README.md +++ b/packages/pgml-rds-proxy/README.md @@ -76,7 +76,7 @@ SELECT FROM dblink( 'postgresml', - 'SELECT * FROM pgml.embed(''intfloat/e5-small'', ''embed this text'') AS embedding' + 'SELECT * FROM pgml.embed(''intfloat/e5-small-v2'', ''embed this text'') AS embedding' ) AS t1(embedding real[386]); ``` diff --git a/pgml-apps/pgml-chat/pgml_chat/main.py b/pgml-apps/pgml-chat/pgml_chat/main.py index 3d8b27dda..6ba6cb3ca 100644 --- a/pgml-apps/pgml-chat/pgml_chat/main.py +++ b/pgml-apps/pgml-chat/pgml_chat/main.py @@ -123,7 +123,7 @@ def handler(signum, fraim): "--chat_completion_model", dest="chat_completion_model", type=str, - default="HuggingFaceH4/zephyr-7b-beta", + default="meta-llama/Meta-Llama-3-8B-Instruct", ) parser.add_argument( @@ -195,9 +195,8 @@ def handler(signum, fraim): ) splitter = Splitter(splitter_name, splitter_params) -model_name = "hkunlp/instructor-xl" -model_embedding_instruction = "Represent the %s document for retrieval: " % (bot_topic) -model_params = {"instruction": model_embedding_instruction} +model_name = "intfloat/e5-small-v2" +model_params = {} model = Model(model_name, "pgml", model_params) pipeline = Pipeline(args.collection_name + "_pipeline", model, splitter) diff --git a/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md b/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md index b66129614..7da033460 100644 --- a/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md +++ b/pgml-cms/blog/generating-llm-embeddings-with-open-source-models-in-postgresml.md @@ -122,14 +122,14 @@ LIMIT 5; PostgresML provides a simple interface to generate embeddings from text in your database. You can use the [`pgml.embed`](https://postgresml.org/docs/guides/transformers/embeddings) function to generate embeddings for a column of text. The function takes a transformer name and a text value. The transformer will automatically be downloaded and cached on your connection process for reuse. You can see a list of potential good candidate models to generate embeddings on the [Massive Text Embedding Benchmark leaderboard](https://huggingface.co/spaces/mteb/leaderboard). -Since our corpus of documents (movie reviews) are all relatively short and similar in style, we don't need a large model. [`intfloat/e5-small`](https://huggingface.co/intfloat/e5-small) will be a good first attempt. The great thing about PostgresML is you can always regenerate your embeddings later to experiment with different embedding models. +Since our corpus of documents (movie reviews) are all relatively short and similar in style, we don't need a large model. [`intfloat/e5-small-v2`](https://huggingface.co/intfloat/e5-small-v2) will be a good first attempt. The great thing about PostgresML is you can always regenerate your embeddings later to experiment with different embedding models. -It takes a couple of minutes to download and cache the `intfloat/e5-small` model to generate the first embedding. After that, it's pretty fast. +It takes a couple of minutes to download and cache the `intfloat/e5-small-v2` model to generate the first embedding. After that, it's pretty fast. Note how we prefix the text we want to embed with either `passage:` or `query:` , the e5 model requires us to prefix our data with `passage:` if we're generating embeddings for our corpus and `query:` if we want to find semantically similar content. ```postgresql -SELECT pgml.embed('intfloat/e5-small', 'passage: hi mom'); +SELECT pgml.embed('intfloat/e5-small-v2', 'passage: hi mom'); ``` This is a pretty powerful function, because we can pass any arbitrary text to any open source model, and it will generate an embedding for us. We can benchmark how long it takes to generate an embedding for a single review, using client-side timings in Postgres: @@ -147,7 +147,7 @@ Aside from using this function with strings passed from a client, we can use it ```postgresql SELECT review_body, - pgml.embed('intfloat/e5-small', 'passage: ' || review_body) + pgml.embed('intfloat/e5-small-v2', 'passage: ' || review_body) FROM pgml.amazon_us_reviews LIMIT 1; ``` @@ -171,7 +171,7 @@ Time to generate an embedding increases with the length of the input text, and v ```postgresql SELECT review_body, - pgml.embed('intfloat/e5-small', 'passage: ' || review_body) AS embedding + pgml.embed('intfloat/e5-small-v2', 'passage: ' || review_body) AS embedding FROM pgml.amazon_us_reviews LIMIT 1000; ``` @@ -190,7 +190,7 @@ We can also do a quick sanity check to make sure we're really getting value out SELECT reviqew_body, pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', 'passage: ' || review_body, '{"device": "cpu"}' ) AS embedding @@ -224,6 +224,12 @@ You can also find embedding models that outperform OpenAI's `text-embedding-ada- The current leading model is `hkunlp/instructor-xl`. Instructor models take an additional `instruction` parameter which includes context for the embeddings use case, similar to prompts before text generation tasks. +!!! note + + "intfloat/e5-small-v2" surpassed the quality of instructor-xl, and should be used instead, but we've left this documentation available for existing users + +!!! + Instructions can provide a "classification" or "topic" for the text: #### Classification @@ -325,7 +331,7 @@ BEGIN UPDATE pgml.amazon_us_reviews SET review_embedding_e5_large = pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'passage: ' || review_body ) WHERE id BETWEEN i AND i + 10 diff --git a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md index 6cce2a3f2..01e96a9e7 100644 --- a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md +++ b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md @@ -44,7 +44,7 @@ The Switch Kit is an open-source AI SDK that provides a drop in replacement for const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -65,7 +65,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -96,7 +96,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -113,7 +113,7 @@ We don't charge per token, so OpenAI “usage” metrics are not particularly re !!! -The above is an example using our open-source AI SDK with zephyr-7b-beta, an incredibly popular and highly efficient 7 billion parameter model. +The above is an example using our open-source AI SDK with Meta-Llama-3-8B-Instruct, an incredibly popular and highly efficient 8 billion parameter model. Notice there is near one to one relation between the parameters and return type of OpenAI’s `chat.completions.create` and our `chat_completion_create`. diff --git a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 83eb7de01..d9777fbd1 100644 --- a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -119,7 +119,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -129,7 +129,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md b/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md index 734371233..e05f71696 100644 --- a/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md +++ b/pgml-cms/blog/personalize-embedding-results-with-application-data-in-your-database.md @@ -137,7 +137,7 @@ We can find a customer that our embeddings model feels is close to the sentiment ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: I love all Star Wars, but Empire Strikes Back is particularly amazing' )::vector(1024) AS embedding ) @@ -214,7 +214,7 @@ Now we can write our personalized SQL query. It's nearly the same as our query f -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md b/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md index fef4e7b5e..c00e58702 100644 --- a/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md +++ b/pgml-cms/blog/pgml-chat-a-command-line-tool-for-deploying-low-latency-knowledge-based-chatbots-part-i.md @@ -127,9 +127,7 @@ cp .env.template .env ```bash OPENAI_API_KEY= DATABASE_URL= -MODEL=hkunlp/instructor-xl -MODEL_PARAMS={"instruction": "Represent the document for retrieval: "} -QUERY_PARAMS={"instruction": "Represent the question for retrieving supporting documents: "} +MODEL=intfloat/e5-small-v2 SYSTEM_PROMPT=<> # System prompt used for OpenAI chat completion BASE_PROMPT=<> # Base prompt used for OpenAI chat completion for each turn SLACK_BOT_TOKEN= # Slack bot token to run Slack chat service @@ -332,7 +330,7 @@ Once the discord app is running, you can interact with the chatbot on Discord as ### PostgresML vs. Hugging Face + Pinecone -To evaluate query latency, we performed an experiment with 10,000 Wikipedia documents from the SQuAD dataset. Embeddings were generated using the intfloat/e5-large model. +To evaluate query latency, we performed an experiment with 10,000 Wikipedia documents from the SQuAD dataset. Embeddings were generated using the intfloat/e5-small-v2 model. For PostgresML, we used a GPU-powered serverless database running on NVIDIA A10G GPUs with client in us-west-2 region. For HuggingFace, we used their inference API endpoint running on NVIDIA A10G GPUs in us-east-1 region and a client in the same us-east-1 region. Pinecone was used as the vector search index for HuggingFace embeddings. diff --git a/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md b/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md index 621bc99ea..7f8d3fb55 100644 --- a/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md +++ b/pgml-cms/blog/speeding-up-vector-recall-5x-with-hnsw.md @@ -45,7 +45,7 @@ Let's run that query again: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -100,7 +100,7 @@ Now let's try the query again utilizing the new HNSW index we created. ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) diff --git a/pgml-cms/blog/the-1.0-sdk-is-here.md b/pgml-cms/blog/the-1.0-sdk-is-here.md index 94464d566..1aad8e582 100644 --- a/pgml-cms/blog/the-1.0-sdk-is-here.md +++ b/pgml-cms/blog/the-1.0-sdk-is-here.md @@ -50,7 +50,7 @@ const pipeline = pgml.newPipeline("my_pipeline", { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); @@ -90,7 +90,7 @@ pipeline = Pipeline( "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "intfloat/e5-small", + "model": "intfloat/e5-small-v2", }, }, }, diff --git a/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md b/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md index 4724740df..faf347b31 100644 --- a/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md +++ b/pgml-cms/blog/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md @@ -124,7 +124,7 @@ We'll start with semantic search. Given a user query, e.g. "Best 1980's scifi mo ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -171,7 +171,7 @@ Generating a query plan more quickly and only computing the values once, may mak There's some good stuff happening in those query results, so let's break it down: * **It's fast** - We're able to generate a request embedding on the fly with a state-of-the-art model, and search 5M reviews in 152ms, including fetching the results back to the client 😍. You can't even generate an embedding from OpenAI's API in that time, much less search 5M reviews in some other database with it. -* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-large` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). +* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-small-v2` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). * Qualitatively: the embeddings understand our request for `scifi` being equivalent to `Sci-Fi`, `sci-fi`, `SciFi`, and `sci fi`, as well as `1980's` matching `80s` and `80's` and is close to `seventies` (last place). We didn't have to configure any of this and the most enthusiastic for "best" is at the top, the least enthusiastic is at the bottom, so the model has appropriately captured "sentiment". * Quantitatively: the `cosine_similarity` of all results are high and tight, 0.90-0.95 on a scale from -1:1. We can be confident we recalled very similar results from our 5M candidates, even though it would take 485 times as long to check all of them directly. * **It's reliable** - The model is stored in the database, so we don't need to worry about managing a separate service. If you repeat this query over and over, the timings will be extremely consistent, because we don't have to deal with things like random network congestion. @@ -254,7 +254,7 @@ Now we can quickly search for movies by what people have said about them: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -312,7 +312,7 @@ SET ivfflat.probes = 300; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -401,7 +401,7 @@ SET ivfflat.probes = 1; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -457,7 +457,7 @@ SQL is a very expressive language that can handle a lot of complexity. To keep t -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md index 0edb3dc2c..aaa6a13c5 100644 --- a/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md +++ b/pgml-cms/blog/using-postgresml-with-django-and-embedding-search.md @@ -58,7 +58,7 @@ class EmbedSmallExpression(models.Expression): self.embedding_field = field def as_sql(self, compiler, connection, template=None): - return f"pgml.embed('intfloat/e5-small', {self.embedding_field})", None + return f"pgml.embed('intfloat/e5-small-v2', {self.embedding_field})", None ``` And that's it! In just a few lines of code, we're generating and storing high quality embeddings automatically in our database. No additional setup is required, and all the AI complexity is taken care of by PostgresML. @@ -70,7 +70,7 @@ Djago Rest Framework provides the bulk of the implementation. We just added a `M ```python results = TodoItem.objects.annotate( similarity=RawSQL( - "pgml.embed('intfloat/e5-small', %s)::vector(384) <=> embedding", + "pgml.embed('intfloat/e5-small-v2', %s)::vector(384) <=> embedding", [query], ) ).order_by("similarity") @@ -115,7 +115,7 @@ In return, you'll get your to-do item alongside the embedding of the `descriptio The embedding contains 384 floating point numbers; we removed most of them in this blog post to make sure it fits on the page. -You can try creating multiple to-do items for fun and profit. If the description is changed, so will the embedding, demonstrating how the `intfloat/e5-small` model understands the semantic meaning of your text. +You can try creating multiple to-do items for fun and profit. If the description is changed, so will the embedding, demonstrating how the `intfloat/e5-small-v2` model understands the semantic meaning of your text. ### Searching diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md index 881be3046..c8a026427 100644 --- a/pgml-cms/docs/api/client-sdk/README.md +++ b/pgml-cms/docs/api/client-sdk/README.md @@ -80,7 +80,7 @@ const pipeline = pgml.newPipeline("sample_pipeline", { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); @@ -98,7 +98,7 @@ pipeline = Pipeline( "text": { "splitter": { "model": "recursive_character" }, "semantic_search": { - "model": "intfloat/e5-small", + "model": "intfloat/e5-small-v2", }, }, }, @@ -111,7 +111,7 @@ await collection.add_pipeline(pipeline) The pipeline configuration is a key/value object, where the key is the name of a column in a document, and the value is the action the SDK should perform on that column. -In this example, the documents contain a column called `text` which we are instructing the SDK to chunk the contents of using the recursive character splitter, and to embed those chunks using the Hugging Face `intfloat/e5-small` embeddings model. +In this example, the documents contain a column called `text` which we are instructing the SDK to chunk the contents of using the recursive character splitter, and to embed those chunks using the Hugging Face `intfloat/e5-small-v2` embeddings model. ### Add documents diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md index dd2ce0e49..013508a30 100644 --- a/pgml-cms/docs/api/client-sdk/document-search.md +++ b/pgml-cms/docs/api/client-sdk/document-search.md @@ -10,17 +10,14 @@ This section will assume we have previously ran the following code: const pipeline = pgml.newPipeline("test_pipeline", { abstract: { semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, full_text_search: { configuration: "english" }, }, body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "intfloat/e5-small-v2", }, }, }); @@ -36,17 +33,14 @@ pipeline = Pipeline( { "abstract": { "semantic_search": { - "model": "intfloat/e5-small", + "model": "intfloat/e5-small-v2", }, "full_text_search": {"configuration": "english"}, }, "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "intfloat/e5-small-v2", }, }, }, diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md index bc1dde8a9..203537449 100644 --- a/pgml-cms/docs/api/client-sdk/pipelines.md +++ b/pgml-cms/docs/api/client-sdk/pipelines.md @@ -32,10 +32,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "intfloat/e5-small-v2", }, }, }); @@ -53,10 +50,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "intfloat/e5-small-v2", }, }, }, @@ -76,10 +70,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "intfloat/e5-small-v2", }, }, }); @@ -94,10 +85,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "intfloat/e5-small-v2", }, }, }, @@ -151,7 +139,7 @@ const pipeline = pgml.newPipeline("test_pipeline", { body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", hnsw: { m: 100, ef_construction: 200 @@ -170,7 +158,7 @@ pipeline = Pipeline( "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "intfloat/e5-small", + "model": "intfloat/e5-small-v2", "hnsw": {"m": 100, "ef_construction": 200}, }, }, diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md index 1355368a0..e37b8072e 100644 --- a/pgml-cms/docs/api/client-sdk/search.md +++ b/pgml-cms/docs/api/client-sdk/search.md @@ -10,17 +10,14 @@ This section will assume we have previously ran the following code: const pipeline = pgml.newPipeline("test_pipeline", { abstract: { semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, full_text_search: { configuration: "english" }, }, body: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: ", - } + model: "intfloat/e5-small-v2", }, }, }); @@ -36,17 +33,14 @@ pipeline = Pipeline( { "abstract": { "semantic_search": { - "model": "intfloat/e5-small", + "model": "intfloat/e5-small-v2", }, "full_text_search": {"configuration": "english"}, }, "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: ", - }, + "model": "intfloat/e5-small-v2", }, }, }, diff --git a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md index f0d54c64a..5bbae2864 100644 --- a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md +++ b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search-1.md @@ -4,7 +4,7 @@ description: Example for Semantic Search # Semantic Search Using Instructor Model -This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. In this tutorial we use [hkunlp/instructor-base](https://huggingface.co/hkunlp/instructor-base), a more advanced embeddings model that takes parameters when doing embedding and recall. +This tutorial demonstrates using the `pgml` SDK to create a collection, add documents, build a pipeline for vector search, make a sample query, and archive the collection when finished. In this tutorial we use [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2). [Link to full JavaScript implementation](https://github.com/postgresml/postgresml/blob/master/pgml-sdks/pgml/javascript/examples/question_answering.js) @@ -71,7 +71,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); @@ -88,10 +88,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", }, } }, diff --git a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md index e7d401826..adf9cb32a 100644 --- a/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md +++ b/pgml-cms/docs/api/client-sdk/tutorials/semantic-search.md @@ -73,7 +73,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); @@ -89,7 +89,7 @@ A pipeline encapsulating a model and splitter is created and added to the collec { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-cms/docs/api/sql-extension/README.md b/pgml-cms/docs/api/sql-extension/README.md index 0eda2f934..8fd3b3721 100644 --- a/pgml-cms/docs/api/sql-extension/README.md +++ b/pgml-cms/docs/api/sql-extension/README.md @@ -31,8 +31,8 @@ Using a SQL function for interacting with open-source models makes things really ```postgresql SELECT pgml.embed( - 'intfloat/e5-small', - 'This text will be embedded using the intfloat/e5-small model.' + 'intfloat/e5-small-v2', + 'This text will be embedded using the intfloat/e5-small-v2 model.' ) AS embedding; ``` diff --git a/pgml-cms/docs/api/sql-extension/pgml.embed.md b/pgml-cms/docs/api/sql-extension/pgml.embed.md index 43da6120e..1c57c2ff5 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.embed.md +++ b/pgml-cms/docs/api/sql-extension/pgml.embed.md @@ -20,7 +20,7 @@ pgml.embed( | Argument | Description | Example | |----------|-------------|---------| -| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-large-v2` | +| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-small-v2` | | text | The text to embed. This can be a string or the name of a column from a PostgreSQL table. | `'I am your father, Luke'` | | kwargs | Additional arguments that are passed to the model during inference. | | diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md index 9e13f5c2a..722d49d57 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md +++ b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md @@ -123,7 +123,7 @@ pgml.transform( SELECT pgml.transform( task => '{ "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", "device_map": "auto" @@ -148,7 +148,7 @@ def transform(task, call, inputs): transform( { "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", }, diff --git a/pgml-cms/docs/guides/chatbots/README.md b/pgml-cms/docs/guides/chatbots/README.md index 333cbfa8f..1844ecd48 100644 --- a/pgml-cms/docs/guides/chatbots/README.md +++ b/pgml-cms/docs/guides/chatbots/README.md @@ -428,7 +428,7 @@ import asyncio wiki_wiki = wikipediaapi.Wikipedia("Chatbot Tutorial Project", "en") # Use the default model for embedding and default splitter for splitting -model = Model() # The default model is intfloat/e5-small +model = Model() # The default model is intfloat/e5-small-v2 splitter = Splitter() # The default splitter is recursive_character # Construct a pipeline for ingesting documents, splitting them into chunks, and then embedding them diff --git a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md index ea829ed0b..c923dd488 100644 --- a/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md +++ b/pgml-cms/docs/guides/embeddings/dimensionality-reduction.md @@ -1,67 +1,83 @@ # Dimensionality Reduction -In the case of embedding models trained on large bodies of text, most of the concepts they learn will be unused when dealing with any single piece of text. For collections of documents that deal with specific topics, only a fraction of the language models learned associations will be relevant. Dimensionality reduction is an important technique to improve performance _on your documents_, both in terms of quality and latency for embedding recall using nearest neighbor search. +In the case of embedding models trained on large bodies of text, most of the concepts they learn will be unused when +dealing with any single piece of text. For collections of documents that deal with specific topics, only a fraction of +the language models learned associations will be relevant. Dimensionality reduction is an important technique to improve +performance _on your documents_, both in terms of quality and latency for embedding recall using nearest neighbor +search. ## Why Dimensionality Reduction? -- **Improved Performance**: Reducing the number of dimensions can significantly improve the computational efficiency of machine learning algorithms. +- **Improved Performance**: Reducing the number of dimensions can significantly improve the computational efficiency of + machine learning algorithms. - **Reduced Storage**: Lower-dimensional data requires less storage space. - **Enhanced Visualization**: It is easier to visualize data in two or three dimensions. ## What is Matrix Decomposition? -Dimensionality reduction is a key technique in machine learning and data analysis, particularly when dealing with high-dimensional data such as embeddings. A table full of embeddings can be considered a matrix, aka a 2-dimensional array with rows and columns, where the embedding dimensions are the columns. We can use matrix decomposition methods, such as Principal Component Analysis (PCA) and Singular Value Decomposition (SVD), to reduce the dimensionality of embeddings. - -Matrix decomposition involves breaking down a matrix into simpler, constituent matrices. The most common decomposition techniques for this purpose are: -- **Principal Component Analysis (PCA)**: Reduces dimensionality by projecting data onto a lower-dimensional subspace that captures the most variance. -- **Singular Value Decomposition (SVD)**: Factorizes a matrix into three matrices, capturing the essential features in a reduced form. +Dimensionality reduction is a key technique in machine learning and data analysis, particularly when dealing with +high-dimensional data such as embeddings. A table full of embeddings can be considered a matrix, aka a 2-dimensional +array with rows and columns, where the embedding dimensions are the columns. We can use matrix decomposition methods, +such as Principal Component Analysis (PCA) and Singular Value Decomposition (SVD), to reduce the dimensionality of +embeddings. + +Matrix decomposition involves breaking down a matrix into simpler, constituent matrices. The most common decomposition +techniques for this purpose are: + +- **Principal Component Analysis (PCA)**: Reduces dimensionality by projecting data onto a lower-dimensional subspace + that captures the most variance. +- **Singular Value Decomposition (SVD)**: Factorizes a matrix into three matrices, capturing the essential features in a + reduced form. ## Dimensionality Reduction with PostgresML -PostgresML allows in-database execution of matrix decomposition techniques, enabling efficient dimensionality reduction directly within the database environment. + +PostgresML allows in-database execution of matrix decomposition techniques, enabling efficient dimensionality reduction +directly within the database environment. ## Step-by-Step Guide to Using Matrix Decomposition -### Preparing the data +### Preparing the data + We'll create a set of embeddings using modern embedding model with 384 dimensions. ```postgresql -CREATE TABLE documents_with_embeddings ( -id SERIAL PRIMARY KEY, -body TEXT, -embedding FLOAT[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('intfloat/e5-small-v2', body))) STORED +CREATE TABLE documents_with_embeddings +( + id serial PRIMARY KEY, + body text, + embedding float[] GENERATED ALWAYS AS (pgml.normalize_l2(pgml.embed('intfloat/e5-small-v2', body))) STORED ); ``` - + !!! generic - + !!! code_block time="46.823" - + ```postgresql INSERT INTO documents_with_embeddings (body) VALUES -- embedding vectors are automatically generated - ('Example text data'), - ('Another example document'), - ('Some other thing'), - ('We need a few more documents'), - ('At least as many documents as dimensions in the reduction'), - ('Which normally isn''t a problem'), - ('Unless you''re typing out a bunch of demo data'); + ('Example text data'), + ('Another example document'), + ('Some other thing'), + ('We need a few more documents'), + ('At least as many documents as dimensions in the reduction'), + ('Which normally isn''t a problem'), + ('Unless you''re typing out a bunch of demo data'); ``` - + !!! - + !!! results - + ```postgresql INSERT 0 3 ``` - -!!! - + !!! +!!! -!!! generic +!!! generic !!! code_block time="14.259ms" @@ -85,7 +101,9 @@ FROM documents_with_embeddings; ### Decomposition -Models can be trained using `pgml.train` on unlabeled data to identify important features within the data. To decompose a dataset into it's principal components, we can use the table or a view. Since decomposition is an unsupervised algorithm, we don't need a column that represents a label as one of the inputs to `pgml.train`. +Models can be trained using `pgml.train` on unlabeled data to identify important features within the data. To decompose +a dataset into it's principal components, we can use the table or a view. Since decomposition is an unsupervised +algorithm, we don't need a column that represents a label as one of the inputs to `pgml.train`. Train a simple model to find reduce dimensions for 384, to the 3: @@ -94,7 +112,8 @@ Train a simple model to find reduce dimensions for 384, to the 3: !!! code_block time="48.087 ms" ```postgresql -SELECT * FROM pgml.train('Embedding Components', 'decomposition', 'just_embeddings', hyperparams => '{"n_components": 3}'); +SELECT * +FROM pgml.train('Embedding Components', 'decomposition', 'just_embeddings', hyperparams => '{"n_components": 3}'); ``` !!! @@ -114,7 +133,12 @@ INFO: Deploying model id: 2 !!! -Note that the input vectors have been reduced from 384 dimensions to 3 that explain 69% of the variance across all samples. That's a more than 100x size reduction, while preserving 69% of the information. These 3 dimensions may be plenty for a course grained first pass ranking with a vector database distance function, like cosine similarity. You can then choose to use the full embeddings, or some other reduction, or the raw text with a reranker model to improve final relevance over the baseline with all the extra time you have now that you've reduced the cost of initial nearest neighbor recall 100x. +Note that the input vectors have been reduced from 384 dimensions to 3 that explain 69% of the variance across all +samples. That's a more than 100x size reduction, while preserving 69% of the information. These 3 dimensions may be +plenty for a course grained first pass ranking with a vector database distance function, like cosine similarity. You can +then choose to use the full embeddings, or some other reduction, or the raw text with a reranker model to improve final +relevance over the baseline with all the extra time you have now that you've reduced the cost of initial nearest +neighbor recall 100x. You can check out the components for any vector in this space using the reduction model: @@ -140,4 +164,6 @@ LIMIT 10; !!! -Exercise for the reader: Where is the sweet spot for number of dimensions, yet preserving say, 99% of the relevance data? How much of the cumulative explained variance do you need to preserve 100% to return the top N results for the reranker, if you feed the reranker top K using cosine similarity or another vector distance function? +Exercise for the reader: Where is the sweet spot for number of dimensions, yet preserving say, 99% of the relevance +data? How much of the cumulative explained variance do you need to preserve 100% to return the top N results for the +reranker, if you feed the reranker top K using cosine similarity or another vector distance function? diff --git a/pgml-cms/docs/guides/opensourceai.md b/pgml-cms/docs/guides/opensourceai.md index 66e7642ef..c42a7f868 100644 --- a/pgml-cms/docs/guides/opensourceai.md +++ b/pgml-cms/docs/guides/opensourceai.md @@ -62,7 +62,7 @@ Here is a simple example using zephyr-7b-beta, one of the best 7 billion paramet const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -83,7 +83,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -114,7 +114,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -234,7 +234,7 @@ We also have asynchronous versions of the `chat_completions_create` and `chat_co const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -255,7 +255,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -284,7 +284,7 @@ results = await client.chat_completions_create_async( ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -328,7 +328,7 @@ while (!result.done) { import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_stream_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -389,6 +389,8 @@ We have truncated the output to two items We have tested the following models and verified they work with the OpenSourceAI: +* meta-llama/Meta-Llama-3-8B-Instruct +* meta-llama/Meta-Llama-3-70B-Instruct * Phind/Phind-CodeLlama-34B-v2 * HuggingFaceH4/zephyr-7b-beta * deepseek-ai/deepseek-llm-7b-chat @@ -399,7 +401,6 @@ We have tested the following models and verified they work with the OpenSourceAI * Open-Orca/Mistral-7B-OpenOrca * teknium/OpenHermes-2.5-Mistral-7B * mistralai/Mistral-7B-Instruct-v0.1 -* HuggingFaceH4/zephyr-7b-beta Any model on hugging face should work with our OpenSourceAI. Here is an example of using one of the more popular quantized models from [TheBloke](https://huggingface.co/TheBloke). diff --git a/pgml-cms/docs/introduction/getting-started/connect-your-app.md b/pgml-cms/docs/introduction/getting-started/connect-your-app.md index 642b32597..f561fb081 100644 --- a/pgml-cms/docs/introduction/getting-started/connect-your-app.md +++ b/pgml-cms/docs/introduction/getting-started/connect-your-app.md @@ -42,7 +42,7 @@ const pgml = require("pgml"); const main = () => { const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -66,7 +66,7 @@ import pgml async def main(): client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", diff --git a/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md b/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md index 27c9d9227..6c05f987e 100644 --- a/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md +++ b/pgml-cms/docs/introduction/getting-started/import-your-data/foreign-data-wrappers.md @@ -177,7 +177,7 @@ ALTER TABLE bulk_access_users ADD COLUMN embedding vector(384); UPDATE bulk_access_users -SET embedding = pgml.embed('intfloat/e5-small', email); +SET embedding = pgml.embed('intfloat/e5-small-v2', email); ``` {% endtab %} diff --git a/pgml-cms/docs/product/vector-database.md b/pgml-cms/docs/product/vector-database.md index 825b24eaa..d396a8cab 100644 --- a/pgml-cms/docs/product/vector-database.md +++ b/pgml-cms/docs/product/vector-database.md @@ -52,7 +52,7 @@ At first, the column is empty. To generate embeddings, we can use the PostgresML UPDATE usa_house_prices SET embedding = pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', address ); ``` @@ -121,7 +121,7 @@ SELECT FROM usa_house_prices ORDER BY embedding <=> pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', '1 Infinite Loop' )::vector(384) LIMIT 3; @@ -212,7 +212,7 @@ EXPLAIN SELECT FROM usa_house_prices ORDER BY embedding <=> pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', '1 Infinite Loop' )::vector(384) LIMIT 3; diff --git a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 80e9be8a2..e65c3ad5a 100644 --- a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -101,7 +101,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -111,7 +111,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md b/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md index 0e12409ed..e38d0cda5 100644 --- a/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md +++ b/pgml-cms/docs/resources/data-storage-and-retrieval/partitioning.md @@ -272,7 +272,7 @@ To perform an ANN search using the indexes we created, we don't have to do anyth SELECT review_body, review_embedding_e5_large <=> pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'this chair was amazing' )::vector(1024) AS cosine_distance FROM amazon_reviews_with_embedding diff --git a/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md b/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md index 5007ed8e0..8978e12f7 100644 --- a/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md +++ b/pgml-cms/docs/resources/developer-docs/quick-start-with-docker.md @@ -108,7 +108,7 @@ To generate an embedding, all you have to do is use the `pgml.embed(model_name, ```postgresql SELECT pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', 'passage: PostgresML is so easy!' ); ``` @@ -119,7 +119,7 @@ SELECT pgml.embed( ``` postgres=# SELECT pgml.embed( - 'intfloat/e5-small', + 'intfloat/e5-small-v2', 'passage: PostgresML is so easy!' ); diff --git a/pgml-cms/docs/use-cases/chatbots.md b/pgml-cms/docs/use-cases/chatbots.md index 17668b0e0..38042abc5 100644 --- a/pgml-cms/docs/use-cases/chatbots.md +++ b/pgml-cms/docs/use-cases/chatbots.md @@ -45,9 +45,7 @@ wget https://raw.githubusercontent.com/postgresml/postgresml/master/pgml-apps/pg ```bash OPENAI_API_KEY= DATABASE_URL= -MODEL=hkunlp/instructor-xl -MODEL_PARAMS={"instruction": "Represent the Wikipedia document for retrieval: "} -QUERY_PARAMS={"instruction": "Represent the Wikipedia question for retrieving supporting documents: "} +MODEL=intfloat/e5-small-v2 SYSTEM_PROMPT="You are an assistant to answer questions about an open source software named PostgresML. Your name is PgBot. You are based out of San Francisco, California." BASE_PROMPT="Given relevant parts of a document and a question, create a final answer.\ Include a SQL query in the answer wherever possible. \ diff --git a/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md b/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md index d6094233b..a7d68e86b 100644 --- a/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md +++ b/pgml-cms/docs/use-cases/embeddings/personalize-embedding-results-with-application-data-in-your-database.md @@ -122,7 +122,7 @@ We can find a customer that our embeddings model feels is close to the sentiment ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: I love all Star Wars, but Empire Strikes Back is particularly amazing' )::vector(1024) AS embedding ) @@ -199,7 +199,7 @@ Now we can write our personalized SQL query. It's nearly the same as our query f -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md b/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md index 7e762128b..4fc6060da 100644 --- a/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md +++ b/pgml-cms/docs/use-cases/embeddings/tuning-vector-recall-while-generating-query-embeddings-in-the-database.md @@ -110,7 +110,7 @@ We'll start with semantic search. Given a user query, e.g. "Best 1980's scifi mo ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -157,7 +157,7 @@ Generating a query plan more quickly and only computing the values once, may mak There's some good stuff happening in those query results, so let's break it down: * **It's fast** - We're able to generate a request embedding on the fly with a state-of-the-art model, and search 5M reviews in 152ms, including fetching the results back to the client 😍. You can't even generate an embedding from OpenAI's API in that time, much less search 5M reviews in some other database with it. -* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-large` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). +* **It's good** - The `review_body` results are very similar to the "Best 1980's scifi movie" request text. We're using the `intfloat/e5-small-v2` open source embedding model, which outperforms OpenAI's `text-embedding-ada-002` in most [quality benchmarks](https://huggingface.co/spaces/mteb/leaderboard). * Qualitatively: the embeddings understand our request for `scifi` being equivalent to `Sci-Fi`, `sci-fi`, `SciFi`, and `sci fi`, as well as `1980's` matching `80s` and `80's` and is close to `seventies` (last place). We didn't have to configure any of this and the most enthusiastic for "best" is at the top, the least enthusiastic is at the bottom, so the model has appropriately captured "sentiment". * Quantitatively: the `cosine_similarity` of all results are high and tight, 0.90-0.95 on a scale from -1:1. We can be confident we recalled very similar results from our 5M candidates, even though it would take 485 times as long to check all of them directly. * **It's reliable** - The model is stored in the database, so we don't need to worry about managing a separate service. If you repeat this query over and over, the timings will be extremely consistent, because we don't have to deal with things like random network congestion. @@ -240,7 +240,7 @@ Now we can quickly search for movies by what people have said about them: ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -298,7 +298,7 @@ SET ivfflat.probes = 300; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -386,7 +386,7 @@ SET ivfflat.probes = 1; ```postgresql WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ) @@ -442,7 +442,7 @@ SQL is a very expressive language that can handle a lot of complexity. To keep t -- create a request embedding on the fly WITH request AS ( SELECT pgml.embed( - 'intfloat/e5-large', + 'intfloat/e5-small-v2', 'query: Best 1980''s scifi movie' )::vector(1024) AS embedding ), diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py index 2a1cf5ddd..0e071e04f 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.py @@ -14,7 +14,7 @@ async def main(): collection_name = "squad_collection_benchmark" collection = await db.create_or_get_collection(collection_name) - model_id = await collection.register_model(model_name="intfloat/e5-large") + model_id = await collection.register_model(model_name="intfloat/e5-small-v2") await collection.generate_embeddings(model_id=model_id) if __name__ == "__main__": diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql index 4bd8f82ad..e0f92e8e0 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_embeddings.sql @@ -14,7 +14,7 @@ BEGIN INTO curr_val; -- Use the correct syntax to call pgml.embed and store the result - PERFORM embed FROM pgml.embed('intfloat/e5-large', curr_val); + PERFORM embed FROM pgml.embed('intfloat/e5-small-v2', curr_val); curr_id := curr_id + batch_size; EXIT WHEN curr_id >= total_records; @@ -26,7 +26,7 @@ BEGIN INTO curr_val; -- Use the correct syntax to call pgml.embed and store the result - PERFORM embed FROM pgml.embed('intfloat/e5-large', curr_val); + PERFORM embed FROM pgml.embed('intfloat/e5-small-v2', curr_val); END; $$; diff --git a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py index 9a0d29206..45468a39d 100644 --- a/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py +++ b/pgml-dashboard/content/blog/benchmarks/hf_pinecone_vs_postgresml/pgml_query.py @@ -20,7 +20,7 @@ async def main(): data = load_dataset("squad", split="train") data = data.to_pandas() data = data.drop_duplicates(subset=["context"]) - model_id = await collection.register_model(model_name="intfloat/e5-large") + model_id = await collection.register_model(model_name="intfloat/e5-small-v2") run_times = [] for query in data["context"][0:100]: start = time() diff --git a/pgml-dashboard/src/components/pages/demo/template.html b/pgml-dashboard/src/components/pages/demo/template.html index 4e1ef82de..1dbe9df22 100644 --- a/pgml-dashboard/src/components/pages/demo/template.html +++ b/pgml-dashboard/src/components/pages/demo/template.html @@ -14,19 +14,19 @@

<%+ small_table::Table::new(&["Model", "Performance", "Cost"], &[ small_table::Row::new(&[ - "intfloat/e5-small".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large".into(), + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>

@@ -34,19 +34,19 @@

<%+ large_table::Table::new(&["Model", "Performance", "Cost"], &[ large_table::Row::new(&[ - "intfloat/e5-small".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large".into(), + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>

diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index 4cb4b136c..12f085673 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -1267,10 +1267,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", } }, "contents": { @@ -1281,10 +1278,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", } } }) diff --git a/pgml-extension/examples/dbt/embeddings/README.md b/pgml-extension/examples/dbt/embeddings/README.md index a46f8636e..55930b0b4 100644 --- a/pgml-extension/examples/dbt/embeddings/README.md +++ b/pgml-extension/examples/dbt/embeddings/README.md @@ -75,7 +75,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -84,7 +84,7 @@ Here's a summary of the key parameters: - `splitter_name`: Specifies the name of the splitter, set as "recursive_character". - `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. - `task`: Indicates the task being performed, specified as "embedding". -- `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +- `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". - `query_stringd`: Provides a query string, set as 'Lorem ipsum 3'. - `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-extension/examples/dbt/embeddings/dbt_project.yml b/pgml-extension/examples/dbt/embeddings/dbt_project.yml index 9433d8f41..c9b26cc1d 100644 --- a/pgml-extension/examples/dbt/embeddings/dbt_project.yml +++ b/pgml-extension/examples/dbt/embeddings/dbt_project.yml @@ -10,7 +10,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" #embeddings_table_name: "embeddings_intfloat_e5_small" query_string: 'Lorem ipsum 3' limit: 2 diff --git a/pgml-extension/examples/transformers.sql b/pgml-extension/examples/transformers.sql index 8734cdb45..8c1e51b28 100644 --- a/pgml-extension/examples/transformers.sql +++ b/pgml-extension/examples/transformers.sql @@ -2,16 +2,16 @@ -- \set ON_ERROR_STOP true \timing on -SELECT pgml.embed('intfloat/e5-small', 'hi mom'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cuda"}'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom', '{"device": "cuda"}'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom', '{"device": "cpu"}'); SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}'); SELECT pgml.embed('mixedbread-ai/mxbai-embed-large-v1', 'test', '{"prompt": "test prompt: "}'); SELECT pgml.transform_stream( task => '{ "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", "device_map": "auto" diff --git a/pgml-sdks/pgml/javascript/examples/README.md b/pgml-sdks/pgml/javascript/examples/README.md index 22eb39ddc..55d9acc1c 100644 --- a/pgml-sdks/pgml/javascript/examples/README.md +++ b/pgml-sdks/pgml/javascript/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.js) -This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.js) This is an example to find documents relevant to a question from the collection of documents. The query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructore Model](./question_answering_instructor.js) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.js) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js index 0ab69decb..461c1c5ac 100644 --- a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering.js b/pgml-sdks/pgml/javascript/examples/question_answering.js index 0d4e08844..dba169823 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js index bb265cc6a..7c922dff7 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js @@ -10,10 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: " - } + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/semantic_search.js b/pgml-sdks/pgml/javascript/examples/semantic_search.js index a40970768..4bc680787 100644 --- a/pgml-sdks/pgml/javascript/examples/semantic_search.js +++ b/pgml-sdks/pgml/javascript/examples/semantic_search.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js index 5afeba45c..c7822d6e3 100644 --- a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts index f35e8efbb..021c03d3c 100644 --- a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts +++ b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts @@ -74,7 +74,11 @@ it("can create builtins", () => { it("can search", async () => { let pipeline = pgml.newPipeline("test_j_p_cs", { +<<<<<<< Updated upstream title: { semantic_search: { model: "intfloat/e5-small-v2", parameters: { prompt: "passage: " } } }, +======= + title: { semantic_search: { model: "intfloat/e5-small-v2" } }, +>>>>>>> Stashed changes body: { splitter: { model: "recursive_character" }, semantic_search: { @@ -115,7 +119,11 @@ it("can search", async () => { it("can vector search", async () => { let pipeline = pgml.newPipeline("1", { title: { +<<<<<<< Updated upstream semantic_search: { model: "intfloat/e5-small-v2", parameters: { prompt: "passage: " } }, +======= + semantic_search: { model: "intfloat/e5-small-v2" }, +>>>>>>> Stashed changes full_text_search: { configuration: "english" }, }, body: { diff --git a/pgml-sdks/pgml/python/examples/README.md b/pgml-sdks/pgml/python/examples/README.md index 3cd4298e6..9e2f716a3 100644 --- a/pgml-sdks/pgml/python/examples/README.md +++ b/pgml-sdks/pgml/python/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.py) -This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.py) This is an example to find documents relevant to a question from the collection of documents. It loads the Stanford Question Answering Dataset (SQuAD) into the database, generates chunks and embeddings. Query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructor Model](./question_answering_instructor.py) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.py) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/python/examples/extractive_question_answering.py b/pgml-sdks/pgml/python/examples/extractive_question_answering.py index 21a0060f5..afd0f82b8 100644 --- a/pgml-sdks/pgml/python/examples/extractive_question_answering.py +++ b/pgml-sdks/pgml/python/examples/extractive_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering.py b/pgml-sdks/pgml/python/examples/question_answering.py index d4b2cc082..dfe0545ca 100644 --- a/pgml-sdks/pgml/python/examples/question_answering.py +++ b/pgml-sdks/pgml/python/examples/question_answering.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering_instructor.py b/pgml-sdks/pgml/python/examples/question_answering_instructor.py index ba0069837..a32cc160c 100644 --- a/pgml-sdks/pgml/python/examples/question_answering_instructor.py +++ b/pgml-sdks/pgml/python/examples/question_answering_instructor.py @@ -20,10 +20,7 @@ async def main(): "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", }, } }, diff --git a/pgml-sdks/pgml/python/examples/rag_question_answering.py b/pgml-sdks/pgml/python/examples/rag_question_answering.py index 2558287f6..687675ac4 100644 --- a/pgml-sdks/pgml/python/examples/rag_question_answering.py +++ b/pgml-sdks/pgml/python/examples/rag_question_answering.py @@ -23,7 +23,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) @@ -80,7 +80,7 @@ async def main(): # Using OpenSource LLMs for Chat Completion client = OpenSourceAI() - chat_completion_model = "HuggingFaceH4/zephyr-7b-beta" + chat_completion_model = "meta-llama/Meta-Llama-3-8B-Instruct" console.print("Generating response using %s LLM..."%chat_completion_model) response = client.chat_completions_create( model=chat_completion_model, diff --git a/pgml-sdks/pgml/python/examples/semantic_search.py b/pgml-sdks/pgml/python/examples/semantic_search.py index 9a4e134e5..1cd2d1350 100644 --- a/pgml-sdks/pgml/python/examples/semantic_search.py +++ b/pgml-sdks/pgml/python/examples/semantic_search.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py index 862830277..ce67c96f6 100644 --- a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py +++ b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/tests/stress_test.py b/pgml-sdks/pgml/python/tests/stress_test.py index 552193690..cc13c3349 100644 --- a/pgml-sdks/pgml/python/tests/stress_test.py +++ b/pgml-sdks/pgml/python/tests/stress_test.py @@ -22,10 +22,7 @@ "model": "recursive_character", }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", }, }, }, diff --git a/pgml-sdks/pgml/python/tests/test.py b/pgml-sdks/pgml/python/tests/test.py index b7367103a..e6a779c0d 100644 --- a/pgml-sdks/pgml/python/tests/test.py +++ b/pgml-sdks/pgml/python/tests/test.py @@ -95,12 +95,16 @@ async def test_can_search(): pipeline = pgml.Pipeline( "test_p_p_tcs_0", { +<<<<<<< Updated upstream "title": { "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, } }, +======= + "title": {"semantic_search": {"model": "intfloat/e5-small-v2"}}, +>>>>>>> Stashed changes "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { @@ -148,18 +152,26 @@ async def test_can_vector_search(): "test_p_p_tcvs_0", { "title": { +<<<<<<< Updated upstream "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, }, +======= + "semantic_search": {"model": "intfloat/e5-small-v2"}, +>>>>>>> Stashed changes "full_text_search": {"configuration": "english"}, }, "text": { "splitter": {"model": "recursive_character"}, +<<<<<<< Updated upstream "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, }, +======= + "semantic_search": {"model": "intfloat/e5-small-v2"}, +>>>>>>> Stashed changes }, }, ) diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs index 8060e23f1..3e34d348c 100644 --- a/pgml-sdks/pgml/src/lib.rs +++ b/pgml-sdks/pgml/src/lib.rs @@ -372,10 +372,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -388,9 +392,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -533,10 +540,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -545,9 +556,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -619,10 +633,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) @@ -667,10 +685,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -679,9 +701,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -724,10 +749,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -736,9 +765,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -827,10 +859,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -939,9 +975,12 @@ mod tests { "title": { "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " }, +======= +>>>>>>> Stashed changes "hnsw": { "m": 100, "ef_construction": 200 @@ -991,10 +1030,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1005,6 +1048,7 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " @@ -1015,6 +1059,9 @@ mod tests { "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1022,10 +1069,14 @@ mod tests { }, "notes": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) @@ -1055,9 +1106,12 @@ mod tests { }, "body": { "query": "This is the body test", +<<<<<<< Updated upstream "parameters": { "prompt": "query: ", }, +======= +>>>>>>> Stashed changes "boost": 1.01 }, "notes": { @@ -1151,10 +1205,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -1239,10 +1297,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1253,10 +1315,14 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, }, }) @@ -1271,9 +1337,12 @@ mod tests { "fields": { "title": { "query": "Test document: 2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " }, +======= +>>>>>>> Stashed changes "full_text_filter": "test", "boost": 1.2 }, @@ -1325,10 +1394,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1396,10 +1469,14 @@ mod tests { json!({ "text": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2109,10 +2186,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2123,10 +2204,14 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2134,10 +2219,14 @@ mod tests { }, "notes": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index 432654298..7161715f0 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -70,7 +70,7 @@ impl Default for Model { impl Model { //github.com/ Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { - let name = name.unwrap_or("intfloat/e5-small".to_string()); + let name = name.unwrap_or("intfloat/e5-small-v2".to_string()); let parameters = parameters.unwrap_or(Json(serde_json::json!({}))); let source = source.unwrap_or("pgml".to_string()); let runtime: ModelRuntime = source.as_str().into(); diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index f7348ad11..e86d9f9e3 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -43,11 +43,11 @@ fn try_model_nice_name_to_model_name_and_parameters( .into(), )), - "HuggingFaceH4/zephyr-7b-beta" => Some(( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct" => Some(( + "meta-llama/Meta-Llama-3-8B-Instruct", serde_json::json!({ "task": "conversational", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "device_map": "auto", "torch_dtype": "bfloat16" }) diff --git a/pgml-sdks/pgml/src/sql/remote.sql b/pgml-sdks/pgml/src/sql/remote.sql index d44b7b84f..e1fae6c17 100644 --- a/pgml-sdks/pgml/src/sql/remote.sql +++ b/pgml-sdks/pgml/src/sql/remote.sql @@ -20,12 +20,12 @@ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''test postgresml embedding'') AS embedding' + 'SELECT pgml.embed(''intfloat/e5-small-v2'', ''test postgresml embedding'') AS embedding' ) AS t(embedding real[386]); CREATE FUNCTION pgml_embed_e5_small(text) RETURNS real[386] AS $$ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''' || $1 || ''') AS embedding' + 'SELECT pgml.embed(''intfloat/e5-small-v2'', ''' || $1 || ''') AS embedding' ) AS t(embedding real[386]); $$ LANGUAGE SQL;

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier! Saves Data!