Skip to content

Commit 073b7e4

Browse files
authored
fix: added missing exit_stack.close() to /v1/chat/completions (abetlen#1796)
* fix: added missing exit_stack.close() to /v1/chat/completions * fix: added missing exit_stack.close() to /v1/completions
1 parent 77a12a3 commit 073b7e4

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

llama_cpp/server/app.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,14 @@ async def create_completion(
314314
else:
315315
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
316316

317-
iterator_or_completion: Union[
318-
llama_cpp.CreateCompletionResponse,
319-
Iterator[llama_cpp.CreateCompletionStreamResponse],
320-
] = await run_in_threadpool(llama, **kwargs)
317+
try:
318+
iterator_or_completion: Union[
319+
llama_cpp.CreateCompletionResponse,
320+
Iterator[llama_cpp.CreateCompletionStreamResponse],
321+
] = await run_in_threadpool(llama, **kwargs)
322+
except Exception as err:
323+
exit_stack.close()
324+
raise err
321325

322326
if isinstance(iterator_or_completion, Iterator):
323327
# EAFP: It's easier to ask for forgiveness than permission
@@ -344,6 +348,7 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
344348
ping_message_factory=_ping_message_factory,
345349
)
346350
else:
351+
exit_stack.close()
347352
return iterator_or_completion
348353

349354

@@ -508,9 +513,13 @@ async def create_chat_completion(
508513
else:
509514
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
510515

511-
iterator_or_completion: Union[
512-
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
513-
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
516+
try:
517+
iterator_or_completion: Union[
518+
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
519+
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
520+
except Exception as err:
521+
exit_stack.close()
522+
raise err
514523

515524
if isinstance(iterator_or_completion, Iterator):
516525
# EAFP: It's easier to ask for forgiveness than permission

llama_cpp/server/errors.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ def error_message_wrapper(
134134
] = None,
135135
) -> Tuple[int, ErrorResponse]:
136136
"""Wraps error message in OpenAI style error response"""
137-
print(f"Exception: {str(error)}", file=sys.stderr)
138-
traceback.print_exc(file=sys.stderr)
139137
if body is not None and isinstance(
140138
body,
141139
(
@@ -149,6 +147,10 @@ def error_message_wrapper(
149147
if match is not None:
150148
return callback(body, match)
151149

150+
# Only print the trace on unexpected exceptions
151+
print(f"Exception: {str(error)}", file=sys.stderr)
152+
traceback.print_exc(file=sys.stderr)
153+
152154
# Wrap other errors as internal server error
153155
return 500, ErrorResponse(
154156
message=str(error),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy