|
85 | 85 | }
|
86 | 86 | }
|
87 | 87 | },
|
88 |
| - "/v1/batch-inference/chat-completion": { |
| 88 | + "/v1/inference/batch-chat-completion": { |
89 | 89 | "post": {
|
90 | 90 | "responses": {
|
91 | 91 | "200": {
|
|
112 | 112 | }
|
113 | 113 | },
|
114 | 114 | "tags": [
|
115 |
| - "BatchInference (Coming Soon)" |
| 115 | + "Inference" |
116 | 116 | ],
|
117 | 117 | "description": "",
|
118 | 118 | "parameters": [],
|
|
128 | 128 | }
|
129 | 129 | }
|
130 | 130 | },
|
131 |
| - "/v1/batch-inference/completion": { |
| 131 | + "/v1/inference/batch-completion": { |
132 | 132 | "post": {
|
133 | 133 | "responses": {
|
134 | 134 | "200": {
|
|
155 | 155 | }
|
156 | 156 | },
|
157 | 157 | "tags": [
|
158 |
| - "BatchInference (Coming Soon)" |
| 158 | + "Inference" |
159 | 159 | ],
|
160 | 160 | "description": "",
|
161 | 161 | "parameters": [],
|
|
239 | 239 | }
|
240 | 240 | },
|
241 | 241 | "tags": [
|
242 |
| - "Inference" |
| 242 | + "BatchInference (Coming Soon)" |
243 | 243 | ],
|
244 | 244 | "description": "Generate a chat completion for the given messages using the specified model.",
|
245 | 245 | "parameters": [],
|
|
287 | 287 | }
|
288 | 288 | },
|
289 | 289 | "tags": [
|
290 |
| - "Inference" |
| 290 | + "BatchInference (Coming Soon)" |
291 | 291 | ],
|
292 | 292 | "description": "Generate a completion for the given content using the specified model.",
|
293 | 293 | "parameters": [],
|
|
4366 | 4366 | ],
|
4367 | 4367 | "title": "ToolCall"
|
4368 | 4368 | },
|
| 4369 | + "ToolConfig": { |
| 4370 | + "type": "object", |
| 4371 | + "properties": { |
| 4372 | + "tool_choice": { |
| 4373 | + "oneOf": [ |
| 4374 | + { |
| 4375 | + "type": "string", |
| 4376 | + "enum": [ |
| 4377 | + "auto", |
| 4378 | + "required", |
| 4379 | + "none" |
| 4380 | + ], |
| 4381 | + "title": "ToolChoice", |
| 4382 | + "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." |
| 4383 | + }, |
| 4384 | + { |
| 4385 | + "type": "string" |
| 4386 | + } |
| 4387 | + ], |
| 4388 | + "default": "auto", |
| 4389 | + "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto." |
| 4390 | + }, |
| 4391 | + "tool_prompt_format": { |
| 4392 | + "type": "string", |
| 4393 | + "enum": [ |
| 4394 | + "json", |
| 4395 | + "function_tag", |
| 4396 | + "python_list" |
| 4397 | + ], |
| 4398 | + "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls." |
| 4399 | + }, |
| 4400 | + "system_message_behavior": { |
| 4401 | + "type": "string", |
| 4402 | + "enum": [ |
| 4403 | + "append", |
| 4404 | + "replace" |
| 4405 | + ], |
| 4406 | + "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.", |
| 4407 | + "default": "append" |
| 4408 | + } |
| 4409 | + }, |
| 4410 | + "additionalProperties": false, |
| 4411 | + "title": "ToolConfig", |
| 4412 | + "description": "Configuration for tool use." |
| 4413 | + }, |
4369 | 4414 | "ToolDefinition": {
|
4370 | 4415 | "type": "object",
|
4371 | 4416 | "properties": {
|
|
4554 | 4599 | "BatchChatCompletionRequest": {
|
4555 | 4600 | "type": "object",
|
4556 | 4601 | "properties": {
|
4557 |
| - "model": { |
| 4602 | + "model_id": { |
4558 | 4603 | "type": "string"
|
4559 | 4604 | },
|
4560 | 4605 | "messages_batch": {
|
|
4575 | 4620 | "$ref": "#/components/schemas/ToolDefinition"
|
4576 | 4621 | }
|
4577 | 4622 | },
|
4578 |
| - "tool_choice": { |
4579 |
| - "type": "string", |
4580 |
| - "enum": [ |
4581 |
| - "auto", |
4582 |
| - "required", |
4583 |
| - "none" |
4584 |
| - ], |
4585 |
| - "title": "ToolChoice", |
4586 |
| - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." |
4587 |
| - }, |
4588 |
| - "tool_prompt_format": { |
4589 |
| - "type": "string", |
4590 |
| - "enum": [ |
4591 |
| - "json", |
4592 |
| - "function_tag", |
4593 |
| - "python_list" |
4594 |
| - ], |
4595 |
| - "title": "ToolPromptFormat", |
4596 |
| - "description": "Prompt format for calling custom / zero shot tools." |
| 4623 | + "tool_config": { |
| 4624 | + "$ref": "#/components/schemas/ToolConfig" |
4597 | 4625 | },
|
4598 | 4626 | "response_format": {
|
4599 | 4627 | "$ref": "#/components/schemas/ResponseFormat"
|
|
4613 | 4641 | },
|
4614 | 4642 | "additionalProperties": false,
|
4615 | 4643 | "required": [
|
4616 |
| - "model", |
| 4644 | + "model_id", |
4617 | 4645 | "messages_batch"
|
4618 | 4646 | ],
|
4619 | 4647 | "title": "BatchChatCompletionRequest"
|
|
4710 | 4738 | "BatchCompletionRequest": {
|
4711 | 4739 | "type": "object",
|
4712 | 4740 | "properties": {
|
4713 |
| - "model": { |
| 4741 | + "model_id": { |
4714 | 4742 | "type": "string"
|
4715 | 4743 | },
|
4716 | 4744 | "content_batch": {
|
|
4740 | 4768 | },
|
4741 | 4769 | "additionalProperties": false,
|
4742 | 4770 | "required": [
|
4743 |
| - "model", |
| 4771 | + "model_id", |
4744 | 4772 | "content_batch"
|
4745 | 4773 | ],
|
4746 | 4774 | "title": "BatchCompletionRequest"
|
|
4812 | 4840 | ],
|
4813 | 4841 | "title": "CancelTrainingJobRequest"
|
4814 | 4842 | },
|
4815 |
| - "ToolConfig": { |
4816 |
| - "type": "object", |
4817 |
| - "properties": { |
4818 |
| - "tool_choice": { |
4819 |
| - "oneOf": [ |
4820 |
| - { |
4821 |
| - "type": "string", |
4822 |
| - "enum": [ |
4823 |
| - "auto", |
4824 |
| - "required", |
4825 |
| - "none" |
4826 |
| - ], |
4827 |
| - "title": "ToolChoice", |
4828 |
| - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." |
4829 |
| - }, |
4830 |
| - { |
4831 |
| - "type": "string" |
4832 |
| - } |
4833 |
| - ], |
4834 |
| - "default": "auto", |
4835 |
| - "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto." |
4836 |
| - }, |
4837 |
| - "tool_prompt_format": { |
4838 |
| - "type": "string", |
4839 |
| - "enum": [ |
4840 |
| - "json", |
4841 |
| - "function_tag", |
4842 |
| - "python_list" |
4843 |
| - ], |
4844 |
| - "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls." |
4845 |
| - }, |
4846 |
| - "system_message_behavior": { |
4847 |
| - "type": "string", |
4848 |
| - "enum": [ |
4849 |
| - "append", |
4850 |
| - "replace" |
4851 |
| - ], |
4852 |
| - "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.", |
4853 |
| - "default": "append" |
4854 |
| - } |
4855 |
| - }, |
4856 |
| - "additionalProperties": false, |
4857 |
| - "title": "ToolConfig", |
4858 |
| - "description": "Configuration for tool use." |
4859 |
| - }, |
4860 | 4843 | "ChatCompletionRequest": {
|
4861 | 4844 | "type": "object",
|
4862 | 4845 | "properties": {
|
|
11173 | 11156 | "x-displayName": "Agents API for creating and interacting with agentic systems."
|
11174 | 11157 | },
|
11175 | 11158 | {
|
11176 |
| - "name": "BatchInference (Coming Soon)" |
| 11159 | + "name": "BatchInference (Coming Soon)", |
| 11160 | + "description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).", |
| 11161 | + "x-displayName": "Batch inference API for generating completions and chat completions." |
11177 | 11162 | },
|
11178 | 11163 | {
|
11179 | 11164 | "name": "Benchmarks"
|
|
0 commit comments