Initial port to pydantic-ai

pamelafox · pamelafox · commit 02c5f128fe33 · 2025-05-05T19:56:21.000Z
diff --git a/src/backend/fastapi_app/prompts/query.txt b/src/backend/fastapi_app/prompts/query.txt
@@ -1,6 +1,2 @@
-Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
-You have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.
-Generate a search query based on the conversation and the new question.
-If the question is not in English, translate the question to English before generating the search query.
-If you cannot generate a search query, return the original user question.
-DO NOT return anything besides the query.
+Your job is to find search results based off the user's question and past messages.
+Once you get the search results, you're done.
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
@@ -1,9 +1,14 @@
+import os
 from collections.abc import AsyncGenerator
-from typing import Any, Final, Optional, Union
+from typing import Optional, TypedDict, Union
 
 from openai import AsyncAzureOpenAI, AsyncOpenAI, AsyncStream
-from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam
-from openai_messages_token_helper import build_messages, get_token_limit
+from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
+from openai_messages_token_helper import get_token_limit
+from pydantic_ai import Agent, RunContext
+from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.providers.openai import OpenAIProvider
+from pydantic_ai.settings import ModelSettings
 
 from fastapi_app.api_models import (
     AIChatRoles,
@@ -15,9 +20,35 @@
 )
 from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
-from fastapi_app.query_rewriter import build_search_function, extract_search_arguments
 from fastapi_app.rag_base import ChatParams, RAGChatBase
 
+# Experiment #1: Annotated did not work!
+# Experiment #2: Function-level docstring, Inline docstrings next to attributes
+#  Function -level docstring leads to XML like this: <summary>Search ...
+# Experiment #3: Move the docstrings below the attributes in triple-quoted strings - SUCCESS!!!
+
+
+class PriceFilter(TypedDict):
+    column: str = "price"
+    """The column to filter on (always 'price' for this filter)"""
+
+    comparison_operator: str
+    """The operator for price comparison ('>', '<', '>=', '<=', '=')"""
+
+    value: float
+    """ The price value to compare against (e.g., 30.00) """
+
+
+class BrandFilter(TypedDict):
+    column: str = "brand"
+    """The column to filter on (always 'brand' for this filter)"""
+
+    comparison_operator: str
+    """The operator for brand comparison ('=' or '!=')"""
+
+    value: str
+    """The brand name to compare against (e.g., 'AirStrider')"""
+
 
 class AdvancedRAGChat(RAGChatBase):
     def __init__(
@@ -34,82 +65,64 @@ def __init__(
         self.chat_deployment = chat_deployment
         self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
 
-    async def generate_search_query(
+    async def search_database(
         self,
-        original_user_query: str,
-        past_messages: list[ChatCompletionMessageParam],
-        query_response_token_limit: int,
-        seed: Optional[int] = None,
-    ) -> tuple[list[ChatCompletionMessageParam], Union[Any, str, None], list]:
-        """Generate an optimized keyword search query based on the chat history and the last question"""
-
-        tools = build_search_function()
-        tool_choice: Final = "auto"
-
-        query_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=self.query_prompt_template,
-            few_shots=self.query_fewshots,
-            new_user_content=original_user_query,
-            past_messages=past_messages,
-            max_tokens=self.chat_token_limit - query_response_token_limit,
-            tools=tools,
-            tool_choice=tool_choice,
-            fallback_to_default=True,
-        )
-
-        chat_completion: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            messages=query_messages,
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            temperature=0.0,  # Minimize creativity for search query generation
-            max_tokens=query_response_token_limit,  # Setting too low risks malformed JSON, too high risks performance
-            n=1,
-            tools=tools,
-            tool_choice=tool_choice,
-            seed=seed,
-        )
-
-        query_text, filters = extract_search_arguments(original_user_query, chat_completion)
-
-        return query_messages, query_text, filters
-
-    async def prepare_context(
-        self, chat_params: ChatParams
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
-        query_messages, query_text, filters = await self.generate_search_query(
-            original_user_query=chat_params.original_user_query,
-            past_messages=chat_params.past_messages,
-            query_response_token_limit=500,
-            seed=chat_params.seed,
-        )
-
-        # Retrieve relevant rows from the database with the GPT optimized query
+        ctx: RunContext[ChatParams],
+        search_query: str,
+        price_filter: Optional[PriceFilter] = None,
+        brand_filter: Optional[BrandFilter] = None,
+    ) -> list[str]:
+        """
+        Search PostgreSQL database for relevant products based on user query
+
+        Args:
+            search_query: Query string to use for full text search, e.g. 'red shoes'
+            price_filter: Filter search results based on price of the product
+            brand_filter: Filter search results based on brand of the product
+
+        Returns:
+            List of formatted items that match the search query and filters
+        """
+        print(search_query, price_filter, brand_filter)
+        # Only send non-None filters
+        filters = []
+        if price_filter:
+            filters.append(price_filter)
+        if brand_filter:
+            filters.append(brand_filter)
         results = await self.searcher.search_and_embed(
-            query_text,
-            top=chat_params.top,
-            enable_vector_search=chat_params.enable_vector_search,
-            enable_text_search=chat_params.enable_text_search,
+            search_query,
+            top=ctx.deps.top,
+            enable_vector_search=ctx.deps.enable_vector_search,
+            enable_text_search=ctx.deps.enable_text_search,
             filters=filters,
         )
+        return [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
 
-        sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
-        content = "\n".join(sources_content)
-
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=chat_params.prompt_template,
-            new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - chat_params.response_token_limit,
-            fallback_to_default=True,
+    async def prepare_context(self, chat_params: ChatParams) -> tuple[str, list[Item], list[ThoughtStep]]:
+        model = OpenAIModel(
+            os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"], provider=OpenAIProvider(openai_client=self.openai_chat_client)
+        )
+        agent = Agent(
+            model,
+            model_settings=ModelSettings(temperature=0.0, max_tokens=500, seed=chat_params.seed),
+            system_prompt=self.query_prompt_template,
+            tools=[self.search_database],
+            output_type=list[str],
+        )
+        # TODO: Provide few-shot examples
+        results = await agent.run(
+            f"Find search results for user query: {chat_params.original_user_query}",
+            # message_history=chat_params.past_messages, # TODO
+            deps=chat_params,
         )
+        if not isinstance(results, list):
+            raise ValueError("Search results should be a list of strings")
 
         thoughts = [
             ThoughtStep(
                 title="Prompt to generate search arguments",
-                description=query_messages,
+                description=chat_params.past_messages,  # TODO: update this
                 props=(
                     {"model": self.chat_model, "deployment": self.chat_deployment}
                     if self.chat_deployment
@@ -118,50 +131,52 @@ async def prepare_context(
             ),
             ThoughtStep(
                 title="Search using generated search arguments",
-                description=query_text,
+                description=chat_params.original_user_query,  # TODO:
                 props={
                     "top": chat_params.top,
                     "vector_search": chat_params.enable_vector_search,
                     "text_search": chat_params.enable_text_search,
-                    "filters": filters,
+                    "filters": [],  # TODO
                 },
             ),
             ThoughtStep(
                 title="Search results",
-                description=[result.to_dict() for result in results],
+                description="",  # TODO
             ),
         ]
-        return contextual_messages, results, thoughts
+        return results, thoughts
 
     async def answer(
         self,
         chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        results: list[str],
         earlier_thoughts: list[ThoughtStep],
     ) -> RetrievalResponse:
-        chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            messages=contextual_messages,
-            temperature=chat_params.temperature,
-            max_tokens=chat_params.response_token_limit,
-            n=1,
-            stream=False,
-            seed=chat_params.seed,
+        agent = Agent(
+            OpenAIModel(
+                os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
+                provider=OpenAIProvider(openai_client=self.openai_chat_client),
+            ),
+            system_prompt=self.answer_prompt_template,
+            model_settings=ModelSettings(
+                temperature=chat_params.temperature, max_tokens=chat_params.response_token_limit, seed=chat_params.seed
+            ),
+        )
+
+        response = await agent.run(
+            user_prompt=chat_params.original_user_query + "Sources:\n" + "\n".join(results),
+            message_history=chat_params.past_messages,
         )
 
         return RetrievalResponse(
-            message=Message(
-                content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT
-            ),
+            message=Message(content=str(response.output), role=AIChatRoles.ASSISTANT),
             context=RAGContext(
                 data_points={item.id: item.to_dict() for item in results},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=contextual_messages,
+                        description="",  # TODO: update
                         props=(
                             {"model": self.chat_model, "deployment": self.chat_deployment}
                             if self.chat_deployment
diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py
@@ -136,10 +136,8 @@ async def chat_handler(
 
         chat_params = rag_flow.get_params(chat_request.messages, chat_request.context.overrides)
 
-        contextual_messages, results, thoughts = await rag_flow.prepare_context(chat_params)
-        response = await rag_flow.answer(
-            chat_params=chat_params, contextual_messages=contextual_messages, results=results, earlier_thoughts=thoughts
-        )
+        results, thoughts = await rag_flow.prepare_context(chat_params)
+        response = await rag_flow.answer(chat_params=chat_params, results=results, earlier_thoughts=thoughts)
         return response
     except Exception as e:
         if isinstance(e, APIError) and e.code == "content_filter":
@@ -187,10 +185,8 @@ async def chat_stream_handler(
     # Intentionally do this before we stream down a response, to avoid using database connections during stream
     # See https://github.com/tiangolo/fastapi/discussions/11321
     try:
-        contextual_messages, results, thoughts = await rag_flow.prepare_context(chat_params)
-        result = rag_flow.answer_stream(
-            chat_params=chat_params, contextual_messages=contextual_messages, results=results, earlier_thoughts=thoughts
-        )
+        results, thoughts = await rag_flow.prepare_context(chat_params)
+        result = rag_flow.answer_stream(chat_params=chat_params, results=results, earlier_thoughts=thoughts)
         return StreamingResponse(content=format_as_ndjson(result), media_type="application/x-ndjson")
     except Exception as e:
         if isinstance(e, APIError) and e.code == "content_filter":
diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "opentelemetry-instrumentation-sqlalchemy",
     "opentelemetry-instrumentation-aiohttp-client",
     "opentelemetry-instrumentation-openai",
+    "pydantic-ai"
 ]
 
 [build-system]

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ dependencies = [`
`19`	`19`	`"opentelemetry-instrumentation-sqlalchemy",`
`20`	`20`	`"opentelemetry-instrumentation-aiohttp-client",`
`21`	`21`	`"opentelemetry-instrumentation-openai",`
	`22`	`+ "pydantic-ai"`
`22`	`23`	`]`
`23`	`24`
`24`	`25`	`[build-system]`