@@ -683,6 +683,7 @@ def vector_search(
683
683
top_k : int = 5 ,
684
684
model_id : int = 1 ,
685
685
splitter_id : int = 1 ,
686
+ ** kwargs : Any ,
686
687
) -> List [Dict [str , Any ]]:
687
688
"""
688
689
This function performs a vector search on a database using a query and returns the top matching
@@ -702,6 +703,9 @@ def vector_search(
702
703
splitter used to split the documents into chunks. It is used to retrieve the embeddings table
703
704
associated with the specified splitter, defaults to 1
704
705
:type splitter_id: int (optional)
706
+ :param kwargs: Additional filtering parameters to be used in the search query. These parameters
707
+ are from the metadata of the documents and can be used to filter the search results based on
708
+ metadata values.
705
709
:return: a list of dictionaries containing search results for a given query. Each dictionary
706
710
contains the following keys: "score", "text", and "metadata". The "score" key contains a float
707
711
value representing the similarity score between the query and the search result. The "text" key
@@ -749,6 +753,13 @@ def vector_search(
749
753
% (model_id , splitter_id , model_id , splitter_id )
750
754
)
751
755
return []
756
+
757
+ if kwargs :
758
+ metadata_filter = [f"documents.metadata->>'{ k } ' = '{ v } '" if isinstance (v , str ) else f"documents.metadata->>'{ k } ' = { v } " for k , v in kwargs .items ()]
759
+ metadata_filter = " AND " .join (metadata_filter )
760
+ metadata_filter = f"AND { metadata_filter } "
761
+ else :
762
+ metadata_filter = ""
752
763
753
764
cte_select_statement = """
754
765
WITH query_cte AS (
@@ -764,7 +775,7 @@ def vector_search(
764
775
SELECT cte.score, chunks.chunk, documents.metadata
765
776
FROM cte
766
777
INNER JOIN {chunks_table} chunks ON chunks.id = cte.chunk_id
767
- INNER JOIN {documents_table} documents ON documents.id = chunks.document_id;
778
+ INNER JOIN {documents_table} documents ON documents.id = chunks.document_id {metadata_filter}
768
779
""" .format (
769
780
model = sql .Literal (model ).as_string (conn ),
770
781
query_text = query ,
@@ -773,6 +784,7 @@ def vector_search(
773
784
top_k = top_k ,
774
785
chunks_table = self .chunks_table ,
775
786
documents_table = self .documents_table ,
787
+ metadata_filter = metadata_filter ,
776
788
)
777
789
778
790
search_results = run_select_statement (
0 commit comments