fix: Do not send an empty 'tools' list to remote vllm (meta-llama#1957)

danalsan · web-flow · commit b5a9ef4c6d9d · 2025-04-15T20:31:12.000-04:00
Fixes: meta-llama#1955 Since 0.2.0, the vLLM gets an empty list (vs ``None``in 0.1.9 and before) when there are no tools configured which causes the issue described in meta-llama#1955 p. This patch avoids sending the 'tools' param to the vLLM altogether instead of an empty list. It also adds a small unit test to avoid regressions. The OpenAI [specification](https://platform.openai.com/docs/api-reference/chat/create) does not explicitly state that the list cannot be empty but I found this out through experimentation and it might depend on the actual remote vllm. In any case, as this parameter is Optional, is best to skip it altogether if there's no tools configured. Signed-off-by: Daniel Alvarez <dalvarez@redhat.com>
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -374,7 +374,8 @@ async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequ
             options["max_tokens"] = self.config.max_tokens
 
         input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
             input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
 
         if isinstance(request, ChatCompletionRequest):
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
@@ -26,7 +26,12 @@
 )
 from openai.types.model import Model as OpenAIModel
 
-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ToolChoice,
+    ToolConfig,
+    UserMessage,
+)
 from llama_stack.apis.models import Model
 from llama_stack.models.llama.datatypes import StopReason
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
@@ -232,3 +237,14 @@ async def do_chat_completion():
     # above.
     asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
     assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+    request = ChatCompletionRequest(
+        tools=[],
+        model="test_model",
+        messages=[UserMessage(content="test")],
+    )
+    params = await vllm_inference_adapter._get_params(request)
+    assert "tools" not in params