Skip to content

Commit d157f34

Browse files
committed
example for query builder
1 parent 6fc17d0 commit d157f34

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

pgml-sdks/python/pgml/examples/question_answering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
start = time()
3535
query = "Who won 20 grammy awards?"
36-
results = collection.vector_search(query, top_k=5, metadata_filter={"title" : "Beyoncé"})
36+
results = collection.vector_search(query, top_k=5, metadata_filter={"title": "Beyoncé"})
3737
_end = time()
3838
console.print("\nResults for '%s'" % (query), style="bold")
3939
console.print(results)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from pgml import Database
2+
import os
3+
import json
4+
from datasets import load_dataset
5+
from time import time
6+
from dotenv import load_dotenv
7+
from rich.console import Console
8+
from pypika import Table
9+
10+
load_dotenv()
11+
console = Console()
12+
13+
local_pgml = "postgres://postgres@127.0.0.1:5433/pgml_development"
14+
15+
conninfo = os.environ.get("PGML_CONNECTION", local_pgml)
16+
db = Database(conninfo)
17+
18+
collection_name = "squad_collection"
19+
collection = db.create_or_get_collection(collection_name)
20+
21+
22+
data = load_dataset("squad", split="train")
23+
data = data.to_pandas()
24+
data = data.drop_duplicates(subset=["context"])
25+
26+
documents = [
27+
{"id": r["id"], "text": r["context"], "title": r["title"]}
28+
for r in data.to_dict(orient="records")
29+
]
30+
31+
collection.upsert_documents(documents[:200])
32+
collection.generate_chunks()
33+
collection.generate_embeddings()
34+
35+
start = time()
36+
query = "Who won 20 grammy awards?"
37+
documents_table = Table("documents", schema=collection_name)
38+
sql_query = (
39+
collection.vector_recall(query)
40+
.where(documents_table.metadata.contains({"title": "Beyoncé"}))
41+
.limit(5)
42+
)
43+
results = collection.execute(sql_query)
44+
_end = time()
45+
console.print("\nResults for '%s'" % (query), style="bold")
46+
console.print(results)
47+
console.print("Query time = %0.3f" % (_end - start))
48+
49+
# db.archive_collection(collection_name)

pgml-sdks/python/pgml/pgml/collection.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -876,11 +876,14 @@ def vector_recall(
876876
.from_(embeddings_table)
877877
.select(
878878
"chunk_id",
879-
CosineDistance(
880-
embeddings_table.embedding, Cast(query_cte.embedding, "vector")
879+
(
880+
1.0
881+
- CosineDistance(
882+
embeddings_table.embedding, Cast(query_cte.embedding, "vector")
883+
)
881884
).as_("score"),
882885
)
883-
.inner_join(AliasedQuery("query_cte"))
886+
.join(AliasedQuery("query_cte"))
884887
.cross()
885888
)
886889

pgml-sdks/python/pgml/tests/test_collection.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,10 @@ def test_vector_recall(self):
174174
query = (
175175
self.collection.vector_recall("product is abc")
176176
.where(documents_table.metadata.contains({"source": "amazon"}))
177-
.where(Cast(documents_table.metadata.get_json_value("reviews"),'INTEGER') < 45)
178-
.limit(10)
177+
.where(
178+
Cast(documents_table.metadata.get_json_value("reviews"), "INTEGER") < 45
179+
)
180+
.limit(2)
179181
)
180182
results = self.collection.execute(query)
181183
assert results[0]["metadata"]["user"] == "John Doe"

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy