@@ -38,11 +38,13 @@ class Settings(BaseSettings):
38
38
default = None ,
39
39
description = "Split layers across multiple GPUs in proportion." ,
40
40
)
41
- rope_freq_base : float = Field (default = 10000 , ge = 1 , description = "RoPE base frequency" )
42
- rope_freq_scale : float = Field (default = 1.0 , description = "RoPE frequency scaling factor" )
43
- seed : int = Field (
44
- default = 1337 , description = "Random seed. -1 for random."
41
+ rope_freq_base : float = Field (
42
+ default = 10000 , ge = 1 , description = "RoPE base frequency"
45
43
)
44
+ rope_freq_scale : float = Field (
45
+ default = 1.0 , description = "RoPE frequency scaling factor"
46
+ )
47
+ seed : int = Field (default = 1337 , description = "Random seed. -1 for random." )
46
48
n_batch : int = Field (
47
49
default = 512 , ge = 1 , description = "The batch size to use per eval."
48
50
)
@@ -186,7 +188,9 @@ def get_settings():
186
188
yield settings
187
189
188
190
189
- model_field = Field (description = "The model to use for generating completions." , default = None )
191
+ model_field = Field (
192
+ description = "The model to use for generating completions." , default = None
193
+ )
190
194
191
195
max_tokens_field = Field (
192
196
default = 16 , ge = 1 , le = 2048 , description = "The maximum number of tokens to generate."
@@ -373,9 +377,11 @@ async def create_completion(
373
377
kwargs = body .model_dump (exclude = exclude )
374
378
375
379
if body .logit_bias is not None :
376
- kwargs ['logits_processor' ] = llama_cpp .LogitsProcessorList ([
377
- make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
378
- ])
380
+ kwargs ["logits_processor" ] = llama_cpp .LogitsProcessorList (
381
+ [
382
+ make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
383
+ ]
384
+ )
379
385
380
386
if body .stream :
381
387
send_chan , recv_chan = anyio .create_memory_object_stream (10 )
@@ -402,7 +408,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
402
408
403
409
return EventSourceResponse (
404
410
recv_chan , data_sender_callable = partial (event_publisher , send_chan )
405
- ) # type: ignore
411
+ ) # type: ignore
406
412
else :
407
413
completion : llama_cpp .Completion = await run_in_threadpool (llama , ** kwargs ) # type: ignore
408
414
return completion
@@ -512,9 +518,11 @@ async def create_chat_completion(
512
518
kwargs = body .model_dump (exclude = exclude )
513
519
514
520
if body .logit_bias is not None :
515
- kwargs ['logits_processor' ] = llama_cpp .LogitsProcessorList ([
516
- make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
517
- ])
521
+ kwargs ["logits_processor" ] = llama_cpp .LogitsProcessorList (
522
+ [
523
+ make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
524
+ ]
525
+ )
518
526
519
527
if body .stream :
520
528
send_chan , recv_chan = anyio .create_memory_object_stream (10 )
@@ -542,7 +550,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
542
550
return EventSourceResponse (
543
551
recv_chan ,
544
552
data_sender_callable = partial (event_publisher , send_chan ),
545
- ) # type: ignore
553
+ ) # type: ignore
546
554
else :
547
555
completion : llama_cpp .ChatCompletion = await run_in_threadpool (
548
556
llama .create_chat_completion , ** kwargs # type: ignore
0 commit comments