From eadb6f152e9f1edd07a13862cf66905cf695a787 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 1 May 2025 18:00:24 +0000 Subject: [PATCH] Port to GitHub Models --- .env.sample | 13 ++++++++++--- evals/evaluate.py | 4 ++++ evals/generate_ground_truth.py | 4 +++- infra/main.bicep | 2 +- src/backend/fastapi_app/dependencies.py | 8 ++++++++ src/backend/fastapi_app/openai_clients.py | 18 ++++++++++++++++++ src/backend/fastapi_app/routes/api_routes.py | 2 +- src/backend/fastapi_app/update_embeddings.py | 2 ++ tests/conftest.py | 2 +- 9 files changed, 48 insertions(+), 7 deletions(-) diff --git a/.env.sample b/.env.sample index 080cf73c..ee58dcf2 100644 --- a/.env.sample +++ b/.env.sample @@ -5,9 +5,9 @@ POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=postgres POSTGRES_SSL=disable -# OPENAI_CHAT_HOST can be either azure, openai, or ollama: +# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github: OPENAI_CHAT_HOST=azure -# OPENAI_EMBED_HOST can be either azure or openai: +# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github: OPENAI_EMBED_HOST=azure # Needed for Azure: # You also need to `azd auth login` if running this locally @@ -28,10 +28,17 @@ AZURE_OPENAI_KEY= OPENAICOM_KEY=YOUR-OPENAI-API-KEY OPENAICOM_CHAT_MODEL=gpt-3.5-turbo OPENAICOM_EMBED_MODEL=text-embedding-3-large -OPENAICOM_EMBED_MODEL_DIMENSIONS=1024 +OPENAICOM_EMBED_DIMENSIONS=1024 OPENAICOM_EMBEDDING_COLUMN=embedding_3l # Needed for Ollama: OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1 OLLAMA_CHAT_MODEL=llama3.1 OLLAMA_EMBED_MODEL=nomic-embed-text OLLAMA_EMBEDDING_COLUMN=embedding_nomic +# Needed for GitHub Models: +GITHUB_TOKEN=YOUR-GITHUB-TOKEN +GITHUB_BASE_URL=https://models.inference.ai.azure.com +GITHUB_MODEL=gpt-4o +GITHUB_EMBED_MODEL=text-embedding-3-large +GITHUB_EMBED_DIMENSIONS=1024 +GITHUB_EMBEDDING_COLUMN=embedding_3l diff --git a/evals/evaluate.py b/evals/evaluate.py index c4075479..efb7f8f2 100644 --- a/evals/evaluate.py +++ b/evals/evaluate.py @@ -66,6 +66,10 @@ def get_openai_config() -> dict: # azure-ai-evaluate will call DefaultAzureCredential behind the scenes, # so we must be logged in to Azure CLI with the correct tenant openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"] + elif os.environ.get("OPENAI_CHAT_HOST") == "ollama": + raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com") + elif os.environ.get("OPENAI_CHAT_HOST") == "github": + raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com") else: logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY") openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"} diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py index f5807a7f..44410506 100644 --- a/evals/generate_ground_truth.py +++ b/evals/generate_ground_truth.py @@ -101,7 +101,9 @@ def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]: ) model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] elif OPENAI_CHAT_HOST == "ollama": - raise NotImplementedError("Ollama OpenAI Service is not supported. Switch to Azure or OpenAI.com") + raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com") + elif OPENAI_CHAT_HOST == "github": + raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com") else: logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY") openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"]) diff --git a/infra/main.bicep b/infra/main.bicep index a55c5c8b..34b8b6e8 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -302,7 +302,7 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [ value: openAIEmbedHost } { - name: 'OPENAICOM_EMBED_MODEL_DIMENSIONS' + name: 'OPENAICOM_EMBED_DIMENSIONS' value: openAIEmbedHost == 'openaicom' ? '1024' : '' } { diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py index de916576..bd7bc4b4 100644 --- a/src/backend/fastapi_app/dependencies.py +++ b/src/backend/fastapi_app/dependencies.py @@ -51,6 +51,11 @@ async def common_parameters(): openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" openai_embed_dimensions = None embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic" + elif OPENAI_EMBED_HOST == "github": + openai_embed_deployment = None + openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large" + openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024)) + embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l" else: openai_embed_deployment = None openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large" @@ -63,6 +68,9 @@ async def common_parameters(): openai_chat_deployment = None openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b" openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" + elif OPENAI_CHAT_HOST == "github": + openai_chat_deployment = None + openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o" else: openai_chat_deployment = None openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo" diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py index f0c1e6e5..e83e0c41 100644 --- a/src/backend/fastapi_app/openai_clients.py +++ b/src/backend/fastapi_app/openai_clients.py @@ -50,6 +50,15 @@ async def create_openai_chat_client( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) + elif OPENAI_CHAT_HOST == "github": + logger.info("Setting up OpenAI client for chat completions using GitHub Models") + github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") + github_model = os.getenv("GITHUB_MODEL", "gpt-4o") + logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}") + openai_chat_client = openai.AsyncOpenAI( + base_url=github_base_url, + api_key=os.getenv("GITHUB_TOKEN"), + ) else: logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key") openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) @@ -99,6 +108,15 @@ async def create_openai_embed_client( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) + elif OPENAI_EMBED_HOST == "github": + logger.info("Setting up OpenAI client for embeddings using GitHub Models") + github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") + github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small") + logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}") + openai_embed_client = openai.AsyncOpenAI( + base_url=github_base_url, + api_key=os.getenv("GITHUB_TOKEN"), + ) else: logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key") openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py index 54e7e3b1..d7486730 100644 --- a/src/backend/fastapi_app/routes/api_routes.py +++ b/src/backend/fastapi_app/routes/api_routes.py @@ -68,7 +68,7 @@ async def similar_handler( f"SELECT *, {context.embedding_column} <=> :embedding as DISTANCE FROM {Item.__tablename__} " "WHERE id <> :item_id ORDER BY distance LIMIT :n" ), - {"embedding": item.embedding_3l, "n": n, "item_id": id}, + {"embedding": getattr(item, context.embedding_column), "n": n, "item_id": id}, ) ).fetchall() diff --git a/src/backend/fastapi_app/update_embeddings.py b/src/backend/fastapi_app/update_embeddings.py index 83744ead..b36113b2 100644 --- a/src/backend/fastapi_app/update_embeddings.py +++ b/src/backend/fastapi_app/update_embeddings.py @@ -29,6 +29,8 @@ async def update_embeddings(in_seed_data=False): embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l") elif OPENAI_EMBED_HOST == "ollama": embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic") + elif OPENAI_EMBED_HOST == "github": + embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l") else: embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l") logger.info(f"Updating embeddings in column: {embedding_column}") diff --git a/tests/conftest.py b/tests/conftest.py index 0af0c2c1..5bbff0f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def mock_session_env_openai(monkeypatch_session): monkeypatch_session.setenv("OPENAICOM_KEY", "fakekey") monkeypatch_session.setenv("OPENAICOM_CHAT_MODEL", "gpt-3.5-turbo") monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL", "text-embedding-3-large") - monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL_DIMENSIONS", "1024") + monkeypatch_session.setenv("OPENAICOM_EMBED_DIMENSIONS", "1024") monkeypatch_session.setenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l") yield pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy