github · garman · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/.env-sample b/.env-sample
@@ -1,2 +1,3 @@
 # get your pat token from: https://github.com/settings/tokens?type=beta
+# if creating a new token, ensure it has `models: read` permissions
 GITHUB_TOKEN="github_pat_****"
diff --git a/cookbooks/python/llamaindex/rag_getting_started.ipynb b/cookbooks/python/llamaindex/rag_getting_started.ipynb
@@ -60,7 +60,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\""
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\""
    ]
   },
   {

diff --git a/samples/js/azure_ai_inference/embeddings.js b/samples/js/azure_ai_inference/embeddings.js
@@ -3,7 +3,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai/inference";
 
 /* By using the Azure AI Inference SDK, you can easily experiment with different models
    by modifying the value of `modelName` in the code below. For this code sample
@@ -32,9 +32,9 @@ export async function main() {
   for (const item of response.body.data) {
     let length = item.embedding.length;
     console.log(
-	  `data[${item.index}]: length=${length}, ` +
-	  `[${item.embedding[0]}, ${item.embedding[1]}, ` +
-	  `..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.body.usage);
 }

diff --git a/samples/js/openai/embeddings.js b/samples/js/openai/embeddings.js
@@ -1,7 +1,7 @@
 import OpenAI from "openai";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai/inference";
 
 /* Pick one of the OpenAI embeddings models from the GitHub Models service */
 const modelName = "text-embedding-3-small";
@@ -11,16 +11,16 @@ export async function main() {
   const client = new OpenAI({ baseURL: endpoint, apiKey: token });
 
   const response = await client.embeddings.create({
-	input: ["first phrase", "second phrase", "third phrase"],
-	model: modelName     
+    input: ["first phrase", "second phrase", "third phrase"],
+    model: modelName
   });
 
   for (const item of response.data) {
-	let length = item.embedding.length;
-	console.log(
-		`data[${item.index}]: length=${length}, ` +
-		`[${item.embedding[0]}, ${item.embedding[1]}, ` +
-		`..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+    let length = item.embedding.length;
+    console.log(
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.usage);
 }

diff --git a/samples/python/azure_ai_inference/embeddings.py b/samples/python/azure_ai_inference/embeddings.py
@@ -4,13 +4,13 @@
 from azure.core.credentials import AzureKeyCredential
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai/inference"
 
 # By using the Azure AI Inference SDK, you can easily experiment with different models
 # by modifying the value of `modelName` in the code below. For this code sample
 # you need an embedding model. The following embedding models are
 # available in the GitHub Models service:
-# 
+#
 # Cohere: Cohere-embed-v3-english, Cohere-embed-v3-multilingual
 # Azure OpenAI: text-embedding-3-small, text-embedding-3-large
 model_name = "text-embedding-3-small"

diff --git a/samples/python/azure_ai_inference/getting_started.ipynb b/samples/python/azure_ai_inference/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "\n",
     "# Create a client\n",
@@ -117,7 +117,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"

diff --git a/samples/python/mistralai/getting_started.ipynb b/samples/python/mistralai/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "# Pick one of the Mistral models from the GitHub Models service\n",
     "model_name = \"Mistral-large\"\n",
@@ -99,7 +99,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"

diff --git a/samples/python/openai/embeddings.py b/samples/python/openai/embeddings.py
@@ -2,7 +2,7 @@
 from openai import OpenAI
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai/inference"
 
 # Pick one of the OpenAI embeddings models from the GitHub Models service
 model_name = "text-embedding-3-small"

diff --git a/samples/python/openai/embeddings_getting_started.ipynb b/samples/python/openai/embeddings_getting_started.ipynb
@@ -55,7 +55,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\"\n",
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\"\n",
     "\n",
     "client = OpenAI()\n"
    ]
@@ -77,7 +77,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "\n",
     "response = client.embeddings.create(\n",
     "    model=model_name,\n",
@@ -105,7 +105,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "inputs = [\"Hello, world!\", \"How are you?\", \"What's the weather like?\"]\n",
     "\n",
     "response = client.embeddings.create(\n",

diff --git a/samples/python/openai/multi_turn.py b/samples/python/openai/multi_turn.py
@@ -16,6 +16,9 @@
 client = OpenAI(
     base_url=endpoint,
     api_key=token,
+    default_headers={
+        "x-ms-useragent": "github-models-sample",
+    }
 )
 
 # Call the chat completion API
@@ -42,4 +45,4 @@
 )
 
 # Print the response
-print(response.choices[0].message.content)
+print(response.choices[0].message.content)