Skip to content

Commit fea33c9

Browse files
committed
feat: Update llama.cpp
1 parent 4d574bd commit fea33c9

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
264264
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265265
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266266
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
267+
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
267268

268269
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
269270
# };
@@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
295296
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296297
LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297298
LLAMA_FTYPE_MOSTLY_IQ2_M = 29
299+
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
298300
LLAMA_FTYPE_GUESSED = 1024
299301

300302
# enum llama_rope_scaling_type {
@@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
548550
# float yarn_beta_fast; // YaRN low correction dim
549551
# float yarn_beta_slow; // YaRN high correction dim
550552
# uint32_t yarn_orig_ctx; // YaRN original context size
553+
# float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
551554

552555
# ggml_backend_sched_eval_callback cb_eval;
553556
# void * cb_eval_user_data;
@@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
580583
yarn_beta_fast (float): YaRN low correction dim
581584
yarn_beta_slow (float): YaRN high correction dim
582585
yarn_orig_ctx (int): YaRN original context size
586+
defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
583587
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
584588
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
585589
type_k (int): data type for K cache
@@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
605609
("yarn_beta_fast", ctypes.c_float),
606610
("yarn_beta_slow", ctypes.c_float),
607611
("yarn_orig_ctx", ctypes.c_uint32),
612+
("defrag_thold", ctypes.c_float),
608613
("cb_eval", ggml_backend_sched_eval_callback),
609614
("cb_eval_user_data", ctypes.c_void_p),
610615
("type_k", ctypes.c_int),

vendor/llama.cpp

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy