diff --git a/pgml-sdks/rust/pgml/javascript/examples/README.md b/pgml-sdks/rust/pgml/javascript/examples/README.md index 440058e4f..77e13b638 100644 --- a/pgml-sdks/rust/pgml/javascript/examples/README.md +++ b/pgml-sdks/rust/pgml/javascript/examples/README.md @@ -11,3 +11,6 @@ In this example, we will use `hknlp/instructor-base` model to build text embeddi ### [Extractive Question Answering](./extractive_question_answering.js) In this example, we will show how to use `vector_recall` result as a `context` to a HuggingFace question answering model. We will use `Builtins.transform()` to run the model on the database. + +### [Summarizing Question Answering](./summarizing_question_answering.js) +This is an example to find documents relevant to a question from the collection of documents and then summarize those documents. diff --git a/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js new file mode 100644 index 000000000..a5e5fe19b --- /dev/null +++ b/pgml-sdks/rust/pgml/javascript/examples/summarizing_question_answering.js @@ -0,0 +1,63 @@ +const pgml = require("pgml"); +require("dotenv").config(); + +pgml.js_init_logger(); + +const main = async () => { + // Initialize the collection + const collection = pgml.newCollection("my_javascript_sqa_collection"); + + // Add a pipeline + const model = pgml.newModel(); + const splitter = pgml.newSplitter(); + const pipeline = pgml.newPipeline( + "my_javascript_sqa_pipeline", + model, + splitter, + ); + await collection.add_pipeline(pipeline); + + // Upsert documents, these documents are automatically split into chunks and embedded by our pipeline + const documents = [ + { + id: "Document One", + text: "PostgresML is the best tool for machine learning applications!", + }, + { + id: "Document Two", + text: "PostgresML is open source and available to everyone!", + }, + ]; + await collection.upsert_documents(documents); + + const query = "What is the best tool for machine learning?"; + + // Perform vector search + const queryResults = await collection + .query() + .vector_recall(query, pipeline) + .limit(1) + .fetch_all(); + + // Construct context from results + const context = queryResults + .map((result) => { + return result[1]; + }) + .join("\n"); + + // Query for summarization + const builtins = pgml.newBuiltins(); + const answer = await builtins.transform( + { task: "summarization", model: "sshleifer/distilbart-cnn-12-6" }, + [context], + ); + + // Archive the collection + await collection.archive(); + return answer; +}; + +main().then((results) => { + console.log("Question summary: \n", results); +}); diff --git a/pgml-sdks/rust/pgml/python/examples/README.md b/pgml-sdks/rust/pgml/python/examples/README.md index e2e22eb6e..81416c038 100644 --- a/pgml-sdks/rust/pgml/python/examples/README.md +++ b/pgml-sdks/rust/pgml/python/examples/README.md @@ -15,3 +15,6 @@ In this example, we will show how to use `vector_recall` result as a `context` t ### [Table Question Answering](./table_question_answering.py) In this example, we will use [Open Table-and-Text Question Answering (OTT-QA) ](https://github.com/wenhuchen/OTT-QA) dataset to run queries on tables. We will use `deepset/all-mpnet-base-v2-table` model that is trained for embedding tabular data for retrieval tasks. + +### [Summarizing Question Answering](./summarizing_question_answering.py) +This is an example to find documents relevant to a question from the collection of documents and then summarize those documents. diff --git a/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py b/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py index 21cfc90b5..21b5f2e67 100644 --- a/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py +++ b/pgml-sdks/rust/pgml/python/examples/extractive_question_answering.py @@ -56,8 +56,7 @@ async def main(): "question-answering", [json.dumps({"question": query, "context": context})] ) end = time() - console.print("Results for query '%s'" % query, style="bold") - console.print(answer) + console.print("Answer '%s'" % answer, style="bold") console.print("Query time = %0.3f" % (end - start)) # Archive collection diff --git a/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py new file mode 100644 index 000000000..4c291aac0 --- /dev/null +++ b/pgml-sdks/rust/pgml/python/examples/summarizing_question_answering.py @@ -0,0 +1,71 @@ +from pgml import Collection, Model, Splitter, Pipeline, Builtins, py_init_logger +import json +from datasets import load_dataset +from time import time +from dotenv import load_dotenv +from rich.console import Console +import asyncio + + +py_init_logger() + + +async def main(): + load_dotenv() + console = Console() + + # Initialize collection + collection = Collection("squad_collection") + + # Create a pipeline using the default model and splitter + model = Model() + splitter = Splitter() + pipeline = Pipeline("squadv1", model, splitter) + await collection.add_pipeline(pipeline) + + # Prep documents for upserting + data = load_dataset("squad", split="train") + data = data.to_pandas() + data = data.drop_duplicates(subset=["context"]) + documents = [ + {"id": r["id"], "text": r["context"], "title": r["title"]} + for r in data.to_dict(orient="records") + ] + + # Upsert documents + await collection.upsert_documents(documents[:200]) + + # Query for context + query = "Who won more than 20 grammy awards?" + console.print("Querying for context ...") + start = time() + results = ( + await collection.query().vector_recall(query, pipeline).limit(5).fetch_all() + ) + end = time() + console.print("\n Results for '%s' " % (query), style="bold") + console.print(results) + console.print("Query time = %0.3f" % (end - start)) + + # Construct context from results + context = " ".join(results[0][1].strip().split()) + context = context.replace('"', '\\"').replace("'", "''") + + # Query for summary + builtins = Builtins() + console.print("Querying for summary ...") + start = time() + summary = await builtins.transform( + {"task": "summarization", "model": "sshleifer/distilbart-cnn-12-6"}, + [context], + ) + end = time() + console.print("Summary '%s'" % summary, style="bold") + console.print("Query time = %0.3f" % (end - start)) + + # Archive collection + await collection.archive() + + +if __name__ == "__main__": + asyncio.run(main()) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy