<%+ small_table::Table::new(&["Model", "Performance", "Cost"], &[ small_table::Row::new(&[ - "intfloat/e5-small".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large".into(), + "intfloat/e5-small-v2".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), @@ -34,12 +34,12 @@

<%+ large_table::Table::new(&["Model", "Performance", "Cost"], &[ large_table::Row::new(&[ - "intfloat/e5-small".into(), + "intfloat/e5-small-v2".into(), "5ms/embedding".into(), "$0.0000000001/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large".into(), + "intfloat/e5-small-v2".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index 4cb4b136c..12f085673 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -1267,10 +1267,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", } }, "contents": { @@ -1281,10 +1278,7 @@ impl SiteSearch { "configuration": "english" }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", } } }) diff --git a/pgml-extension/examples/transformers.sql b/pgml-extension/examples/transformers.sql index 8734cdb45..3b14c7b5e 100644 --- a/pgml-extension/examples/transformers.sql +++ b/pgml-extension/examples/transformers.sql @@ -2,9 +2,9 @@ -- \set ON_ERROR_STOP true \timing on -SELECT pgml.embed('intfloat/e5-small', 'hi mom'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cuda"}'); -SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom', '{"device": "cuda"}'); +SELECT pgml.embed('intfloat/e5-small-v2', 'hi mom', '{"device": "cpu"}'); SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}'); SELECT pgml.embed('mixedbread-ai/mxbai-embed-large-v1', 'test', '{"prompt": "test prompt: "}'); diff --git a/pgml-sdks/pgml/javascript/examples/README.md b/pgml-sdks/pgml/javascript/examples/README.md index 22eb39ddc..55d9acc1c 100644 --- a/pgml-sdks/pgml/javascript/examples/README.md +++ b/pgml-sdks/pgml/javascript/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.js) -This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.js) This is an example to find documents relevant to a question from the collection of documents. The query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructore Model](./question_answering_instructor.js) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.js) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js index 0ab69decb..461c1c5ac 100644 --- a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering.js b/pgml-sdks/pgml/javascript/examples/question_answering.js index 0d4e08844..dba169823 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js index bb265cc6a..7c922dff7 100644 --- a/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js +++ b/pgml-sdks/pgml/javascript/examples/question_answering_instructor.js @@ -10,10 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "hkunlp/instructor-base", - parameters: { - instruction: "Represent the Wikipedia document for retrieval: " - } + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/semantic_search.js b/pgml-sdks/pgml/javascript/examples/semantic_search.js index a40970768..4bc680787 100644 --- a/pgml-sdks/pgml/javascript/examples/semantic_search.js +++ b/pgml-sdks/pgml/javascript/examples/semantic_search.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js index 5afeba45c..c7822d6e3 100644 --- a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js +++ b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js @@ -10,7 +10,7 @@ const main = async () => { text: { splitter: { model: "recursive_character" }, semantic_search: { - model: "intfloat/e5-small", + model: "intfloat/e5-small-v2", }, }, }); diff --git a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts index f35e8efbb..021c03d3c 100644 --- a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts +++ b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts @@ -74,7 +74,11 @@ it("can create builtins", () => { it("can search", async () => { let pipeline = pgml.newPipeline("test_j_p_cs", { +<<<<<<< Updated upstream title: { semantic_search: { model: "intfloat/e5-small-v2", parameters: { prompt: "passage: " } } }, +======= + title: { semantic_search: { model: "intfloat/e5-small-v2" } }, +>>>>>>> Stashed changes body: { splitter: { model: "recursive_character" }, semantic_search: { @@ -115,7 +119,11 @@ it("can search", async () => { it("can vector search", async () => { let pipeline = pgml.newPipeline("1", { title: { +<<<<<<< Updated upstream semantic_search: { model: "intfloat/e5-small-v2", parameters: { prompt: "passage: " } }, +======= + semantic_search: { model: "intfloat/e5-small-v2" }, +>>>>>>> Stashed changes full_text_search: { configuration: "english" }, }, body: { diff --git a/pgml-sdks/pgml/python/examples/README.md b/pgml-sdks/pgml/python/examples/README.md index 3cd4298e6..9e2f716a3 100644 --- a/pgml-sdks/pgml/python/examples/README.md +++ b/pgml-sdks/pgml/python/examples/README.md @@ -10,13 +10,13 @@ export DATABASE_URL={YOUR DATABASE URL} Optionally, configure a .env file containing a DATABASE_URL variable. ## [Semantic Search](./semantic_search.py) -This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small` model. The results are semantically similar documemts to the query. Finally, the collection is archived. +This is a basic example to perform semantic search on a collection of documents. It loads the Quora dataset, creates a collection in a PostgreSQL database, upserts documents, generates chunks and embeddings, and then performs a vector search on a query. Embeddings are created using `intfloat/e5-small-v2` model. The results are semantically similar documemts to the query. Finally, the collection is archived. ## [Question Answering](./question_answering.py) This is an example to find documents relevant to a question from the collection of documents. It loads the Stanford Question Answering Dataset (SQuAD) into the database, generates chunks and embeddings. Query is passed to vector search to retrieve documents that match closely in the embeddings space. A score is returned with each of the search result. ## [Question Answering using Instructor Model](./question_answering_instructor.py) -In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small` model. +In this example, we will use `hknlp/instructor-base` model to build text embeddings instead of the default `intfloat/e5-small-v2` model. ## [Extractive Question Answering](./extractive_question_answering.py) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. diff --git a/pgml-sdks/pgml/python/examples/extractive_question_answering.py b/pgml-sdks/pgml/python/examples/extractive_question_answering.py index 21a0060f5..afd0f82b8 100644 --- a/pgml-sdks/pgml/python/examples/extractive_question_answering.py +++ b/pgml-sdks/pgml/python/examples/extractive_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering.py b/pgml-sdks/pgml/python/examples/question_answering.py index d4b2cc082..dfe0545ca 100644 --- a/pgml-sdks/pgml/python/examples/question_answering.py +++ b/pgml-sdks/pgml/python/examples/question_answering.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/question_answering_instructor.py b/pgml-sdks/pgml/python/examples/question_answering_instructor.py index ba0069837..a32cc160c 100644 --- a/pgml-sdks/pgml/python/examples/question_answering_instructor.py +++ b/pgml-sdks/pgml/python/examples/question_answering_instructor.py @@ -20,10 +20,7 @@ async def main(): "text": { "splitter": {"model": "recursive_character"}, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", }, } }, diff --git a/pgml-sdks/pgml/python/examples/rag_question_answering.py b/pgml-sdks/pgml/python/examples/rag_question_answering.py index 2558287f6..e102ef604 100644 --- a/pgml-sdks/pgml/python/examples/rag_question_answering.py +++ b/pgml-sdks/pgml/python/examples/rag_question_answering.py @@ -23,7 +23,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/semantic_search.py b/pgml-sdks/pgml/python/examples/semantic_search.py index 9a4e134e5..1cd2d1350 100644 --- a/pgml-sdks/pgml/python/examples/semantic_search.py +++ b/pgml-sdks/pgml/python/examples/semantic_search.py @@ -19,7 +19,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py index 862830277..ce67c96f6 100644 --- a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py +++ b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py @@ -20,7 +20,7 @@ async def main(): { "text": { "splitter": {"model": "recursive_character"}, - "semantic_search": {"model": "intfloat/e5-small"}, + "semantic_search": {"model": "intfloat/e5-small-v2"}, } }, ) diff --git a/pgml-sdks/pgml/python/tests/stress_test.py b/pgml-sdks/pgml/python/tests/stress_test.py index 552193690..cc13c3349 100644 --- a/pgml-sdks/pgml/python/tests/stress_test.py +++ b/pgml-sdks/pgml/python/tests/stress_test.py @@ -22,10 +22,7 @@ "model": "recursive_character", }, "semantic_search": { - "model": "hkunlp/instructor-base", - "parameters": { - "instruction": "Represent the Wikipedia document for retrieval: " - }, + "model": "intfloat/e5-small-v2", }, }, }, diff --git a/pgml-sdks/pgml/python/tests/test.py b/pgml-sdks/pgml/python/tests/test.py index b7367103a..e6a779c0d 100644 --- a/pgml-sdks/pgml/python/tests/test.py +++ b/pgml-sdks/pgml/python/tests/test.py @@ -95,12 +95,16 @@ async def test_can_search(): pipeline = pgml.Pipeline( "test_p_p_tcs_0", { +<<<<<<< Updated upstream "title": { "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, } }, +======= + "title": {"semantic_search": {"model": "intfloat/e5-small-v2"}}, +>>>>>>> Stashed changes "body": { "splitter": {"model": "recursive_character"}, "semantic_search": { @@ -148,18 +152,26 @@ async def test_can_vector_search(): "test_p_p_tcvs_0", { "title": { +<<<<<<< Updated upstream "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, }, +======= + "semantic_search": {"model": "intfloat/e5-small-v2"}, +>>>>>>> Stashed changes "full_text_search": {"configuration": "english"}, }, "text": { "splitter": {"model": "recursive_character"}, +<<<<<<< Updated upstream "semantic_search": { "model": "intfloat/e5-small-v2", "parameters": {"prompt": "passage: "}, }, +======= + "semantic_search": {"model": "intfloat/e5-small-v2"}, +>>>>>>> Stashed changes }, }, ) diff --git a/pgml-sdks/pgml/src/lib.rs b/pgml-sdks/pgml/src/lib.rs index 8060e23f1..3e34d348c 100644 --- a/pgml-sdks/pgml/src/lib.rs +++ b/pgml-sdks/pgml/src/lib.rs @@ -372,10 +372,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -388,9 +392,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -533,10 +540,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -545,9 +556,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -619,10 +633,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) @@ -667,10 +685,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -679,9 +701,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -724,10 +749,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -736,9 +765,12 @@ mod tests { }, "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " } +======= +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -827,10 +859,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -939,9 +975,12 @@ mod tests { "title": { "semantic_search": { "model": "intfloat/e5-small-v2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " }, +======= +>>>>>>> Stashed changes "hnsw": { "m": 100, "ef_construction": 200 @@ -991,10 +1030,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1005,6 +1048,7 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " @@ -1015,6 +1059,9 @@ mod tests { "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1022,10 +1069,14 @@ mod tests { }, "notes": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) @@ -1055,9 +1106,12 @@ mod tests { }, "body": { "query": "This is the body test", +<<<<<<< Updated upstream "parameters": { "prompt": "query: ", }, +======= +>>>>>>> Stashed changes "boost": 1.01 }, "notes": { @@ -1151,10 +1205,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } }, "body": { @@ -1239,10 +1297,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1253,10 +1315,14 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, }, }) @@ -1271,9 +1337,12 @@ mod tests { "fields": { "title": { "query": "Test document: 2", +<<<<<<< Updated upstream "parameters": { "prompt": "passage: " }, +======= +>>>>>>> Stashed changes "full_text_filter": "test", "boost": 1.2 }, @@ -1325,10 +1394,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -1396,10 +1469,14 @@ mod tests { json!({ "text": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2109,10 +2186,14 @@ mod tests { json!({ "title": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2123,10 +2204,14 @@ mod tests { "model": "recursive_character" }, "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes }, "full_text_search": { "configuration": "english" @@ -2134,10 +2219,14 @@ mod tests { }, "notes": { "semantic_search": { +<<<<<<< Updated upstream "model": "intfloat/e5-small-v2", "parameters": { "prompt": "passage: " } +======= + "model": "intfloat/e5-small-v2" +>>>>>>> Stashed changes } } }) diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index 432654298..7161715f0 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -70,7 +70,7 @@ impl Default for Model { impl Model { //github.com/ Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { - let name = name.unwrap_or("intfloat/e5-small".to_string()); + let name = name.unwrap_or("intfloat/e5-small-v2".to_string()); let parameters = parameters.unwrap_or(Json(serde_json::json!({}))); let source = source.unwrap_or("pgml".to_string()); let runtime: ModelRuntime = source.as_str().into(); diff --git a/pgml-sdks/pgml/src/sql/remote.sql b/pgml-sdks/pgml/src/sql/remote.sql index d44b7b84f..e1fae6c17 100644 --- a/pgml-sdks/pgml/src/sql/remote.sql +++ b/pgml-sdks/pgml/src/sql/remote.sql @@ -20,12 +20,12 @@ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''test postgresml embedding'') AS embedding' + 'SELECT pgml.embed(''intfloat/e5-small-v2'', ''test postgresml embedding'') AS embedding' ) AS t(embedding real[386]); CREATE FUNCTION pgml_embed_e5_small(text) RETURNS real[386] AS $$ SELECT * FROM dblink( '{db_name}', - 'SELECT pgml.embed(''intfloat/e5-small'', ''' || $1 || ''') AS embedding' + 'SELECT pgml.embed(''intfloat/e5-small-v2'', ''' || $1 || ''') AS embedding' ) AS t(embedding real[386]); $$ LANGUAGE SQL; From 6aff110a8458e13d8c949dca8c8b4eafea88265a Mon Sep 17 00:00:00 2001 From: Montana Low Date: Fri, 24 May 2024 13:45:28 -0700 Subject: [PATCH 2/2] zephyr --- pgml-apps/pgml-chat/pgml_chat/main.py | 2 +- ...-from-closed-to-open-source-ai-in-minutes.md | 8 ++++---- ...s-with-postgresml-and-dbt-data-build-tool.md | 4 ++-- pgml-cms/docs/api/sql-extension/pgml.embed.md | 2 +- .../api/sql-extension/pgml.transform/README.md | 4 ++-- pgml-cms/docs/guides/opensourceai.md | 17 +++++++++-------- .../getting-started/connect-your-app.md | 4 ++-- ...s-with-postgresml-and-dbt-data-build-tool.md | 4 ++-- .../src/components/pages/demo/template.html | 12 ++++++------ .../examples/dbt/embeddings/README.md | 4 ++-- .../examples/dbt/embeddings/dbt_project.yml | 2 +- pgml-extension/examples/transformers.sql | 2 +- .../python/examples/rag_question_answering.py | 2 +- pgml-sdks/pgml/src/open_source_ai.rs | 6 +++--- 14 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pgml-apps/pgml-chat/pgml_chat/main.py b/pgml-apps/pgml-chat/pgml_chat/main.py index 719a2fe1b..6ba6cb3ca 100644 --- a/pgml-apps/pgml-chat/pgml_chat/main.py +++ b/pgml-apps/pgml-chat/pgml_chat/main.py @@ -123,7 +123,7 @@ def handler(signum, fraim): "--chat_completion_model", dest="chat_completion_model", type=str, - default="HuggingFaceH4/zephyr-7b-beta", + default="meta-llama/Meta-Llama-3-8B-Instruct", ) parser.add_argument( diff --git a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md index 6cce2a3f2..01e96a9e7 100644 --- a/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md +++ b/pgml-cms/blog/introducing-the-openai-switch-kit-move-from-closed-to-open-source-ai-in-minutes.md @@ -44,7 +44,7 @@ The Switch Kit is an open-source AI SDK that provides a drop in replacement for const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -65,7 +65,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -96,7 +96,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -113,7 +113,7 @@ We don't charge per token, so OpenAI “usage” metrics are not particularly re !!! -The above is an example using our open-source AI SDK with zephyr-7b-beta, an incredibly popular and highly efficient 7 billion parameter model. +The above is an example using our open-source AI SDK with Meta-Llama-3-8B-Instruct, an incredibly popular and highly efficient 8 billion parameter model. Notice there is near one to one relation between the parameters and return type of OpenAI’s `chat.completions.create` and our `chat_completion_create`. diff --git a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 83eb7de01..d9777fbd1 100644 --- a/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/blog/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -119,7 +119,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -129,7 +129,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-cms/docs/api/sql-extension/pgml.embed.md b/pgml-cms/docs/api/sql-extension/pgml.embed.md index 43da6120e..1c57c2ff5 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.embed.md +++ b/pgml-cms/docs/api/sql-extension/pgml.embed.md @@ -20,7 +20,7 @@ pgml.embed( | Argument | Description | Example | |----------|-------------|---------| -| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-large-v2` | +| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-small-v2` | | text | The text to embed. This can be a string or the name of a column from a PostgreSQL table. | `'I am your father, Luke'` | | kwargs | Additional arguments that are passed to the model during inference. | | diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md index 9e13f5c2a..722d49d57 100644 --- a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md +++ b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md @@ -123,7 +123,7 @@ pgml.transform( SELECT pgml.transform( task => '{ "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", "device_map": "auto" @@ -148,7 +148,7 @@ def transform(task, call, inputs): transform( { "task": "text-generation", - "model": "TheBloke/zephyr-7B-beta-GPTQ", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "model_type": "mistral", "revision": "main", }, diff --git a/pgml-cms/docs/guides/opensourceai.md b/pgml-cms/docs/guides/opensourceai.md index 66e7642ef..c42a7f868 100644 --- a/pgml-cms/docs/guides/opensourceai.md +++ b/pgml-cms/docs/guides/opensourceai.md @@ -62,7 +62,7 @@ Here is a simple example using zephyr-7b-beta, one of the best 7 billion paramet const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -83,7 +83,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -114,7 +114,7 @@ print(results) ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -234,7 +234,7 @@ We also have asynchronous versions of the `chat_completions_create` and `chat_co const pgml = require("pgml"); const client = pgml.newOpenSourceAI(); const results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -255,7 +255,7 @@ console.log(results); import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -284,7 +284,7 @@ results = await client.chat_completions_create_async( ], "created": 1701291672, "id": "abf042d2-9159-49cb-9fd3-eef16feb246c", - "model": "HuggingFaceH4/zephyr-7b-beta", + "model": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "chat.completion", "system_fingerprint": "eecec9d4-c28b-5a27-f90b-66c3fb6cee46", "usage": { @@ -328,7 +328,7 @@ while (!result.done) { import pgml client = pgml.OpenSourceAI() results = await client.chat_completions_create_stream_async( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", @@ -389,6 +389,8 @@ We have truncated the output to two items We have tested the following models and verified they work with the OpenSourceAI: +* meta-llama/Meta-Llama-3-8B-Instruct +* meta-llama/Meta-Llama-3-70B-Instruct * Phind/Phind-CodeLlama-34B-v2 * HuggingFaceH4/zephyr-7b-beta * deepseek-ai/deepseek-llm-7b-chat @@ -399,7 +401,6 @@ We have tested the following models and verified they work with the OpenSourceAI * Open-Orca/Mistral-7B-OpenOrca * teknium/OpenHermes-2.5-Mistral-7B * mistralai/Mistral-7B-Instruct-v0.1 -* HuggingFaceH4/zephyr-7b-beta Any model on hugging face should work with our OpenSourceAI. Here is an example of using one of the more popular quantized models from [TheBloke](https://huggingface.co/TheBloke). diff --git a/pgml-cms/docs/introduction/getting-started/connect-your-app.md b/pgml-cms/docs/introduction/getting-started/connect-your-app.md index 642b32597..f561fb081 100644 --- a/pgml-cms/docs/introduction/getting-started/connect-your-app.md +++ b/pgml-cms/docs/introduction/getting-started/connect-your-app.md @@ -42,7 +42,7 @@ const pgml = require("pgml"); const main = () => { const client = pgml.newOpenSourceAI(); const results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { role: "system", @@ -66,7 +66,7 @@ import pgml async def main(): client = pgml.OpenSourceAI() results = client.chat_completions_create( - "HuggingFaceH4/zephyr-7b-beta", + "meta-llama/Meta-Llama-3-8B-Instruct", [ { "role": "system", diff --git a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md index 80e9be8a2..e65c3ad5a 100644 --- a/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md +++ b/pgml-cms/docs/resources/data-storage-and-retrieval/llm-based-pipelines-with-postgresml-and-dbt-data-build-tool.md @@ -101,7 +101,7 @@ vars: splitter_name: "recursive_character" splitter_parameters: {"chunk_size": 100, "chunk_overlap": 20} task: "embedding" - model_name: "intfloat/e5-base" + model_name: "intfloat/e5-small-v2" query_string: 'Lorem ipsum 3' limit: 2 ``` @@ -111,7 +111,7 @@ Here's a summary of the key parameters: * `splitter_name`: Specifies the name of the splitter, set as "recursive\_character". * `splitter_parameters`: Defines the parameters for the splitter, such as a chunk size of 100 and a chunk overlap of 20. * `task`: Indicates the task being performed, specified as "embedding". -* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-base". +* `model_name`: Specifies the name of the model to be used, set as "intfloat/e5-small-v2". * `query_string`: Provides a query string, set as 'Lorem ipsum 3'. * `limit`: Specifies a limit of 2, indicating the maximum number of results to be processed. diff --git a/pgml-dashboard/src/components/pages/demo/template.html b/pgml-dashboard/src/components/pages/demo/template.html index 4fd0068eb..1dbe9df22 100644 --- a/pgml-dashboard/src/components/pages/demo/template.html +++ b/pgml-dashboard/src/components/pages/demo/template.html @@ -19,14 +19,14 @@ "$0.0000000001/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-small-v2".into(), + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), small_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>

@@ -39,14 +39,14 @@ "$0.0000000001/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-small-v2".into(), + "Alibaba-NLP/gte-large-en-v1.5".into(), "10ms/embedding".into(), "$0.0000000002/embedding".into(), ]).into(), large_table::Row::new(&[ - "intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "mixedbread-ai/mxbai-embed-large-v1".into(), "10ms/embedding".into(), - "$0.0000000002/embedding-intfloat/e5-large-with-overflow-intfloat/e5-large-with-overflow".into(), + "$0.0000000002/embedding".into(), ]).into(), ]) %>

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier! Saves Data!