Skip to content

Commit c6b54d8

Browse files
committed
Sync quantize: Handle user-defined quantization levels for additional tensors (#12511)
1 parent 56e7783 commit c6b54d8

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

llama_cpp/llama_cpp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ class llama_model_params(ctypes.Structure):
702702
"""Parameters for llama_model
703703
704704
Attributes:
705+
devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
705706
tensor_buft_overrides(llama_model_tensor_buft_override): NULL-terminated list of buffer types to use for tensors that match a pattern
706707
n_gpu_layers (int): number of layers to store in VRAM
707708
split_mode (int): how to split the model across multiple GPUs
@@ -716,6 +717,7 @@ class llama_model_params(ctypes.Structure):
716717
check_tensors (bool): validate model tensor data"""
717718

718719
if TYPE_CHECKING:
720+
devices: CtypesArray[ctypes.c_void_p] # NOTE: unused
719721
tensor_buft_overrides: ctypes.POINTER(llama_model_tensor_buft_override)
720722
n_gpu_layers: int
721723
split_mode: int
@@ -915,6 +917,7 @@ class llama_context_params(ctypes.Structure):
915917
# bool keep_split; // quantize to the same number of shards
916918
# void * imatrix; // pointer to importance matrix data
917919
# void * kv_overrides; // pointer to vector containing overrides
920+
# void * tensor_types; // pointer to vector containing tensor types
918921
# } llama_model_quantize_params;
919922
class llama_model_quantize_params(ctypes.Structure):
920923
"""Parameters for llama_model_quantize
@@ -931,6 +934,7 @@ class llama_model_quantize_params(ctypes.Structure):
931934
keep_split (bool): quantize to the same number of shards
932935
imatrix (ctypes.c_void_p): pointer to importance matrix data
933936
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
937+
tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
934938
"""
935939

936940
if TYPE_CHECKING:
@@ -945,6 +949,7 @@ class llama_model_quantize_params(ctypes.Structure):
945949
keep_split: bool
946950
imatrix: ctypes.c_void_p
947951
kv_overrides: ctypes.c_void_p
952+
tensor_types: ctypes.c_void_p
948953

949954
_fields_ = [
950955
("nthread", ctypes.c_int32),
@@ -958,6 +963,7 @@ class llama_model_quantize_params(ctypes.Structure):
958963
("keep_split", ctypes.c_bool),
959964
("imatrix", ctypes.c_void_p),
960965
("kv_overrides", ctypes.c_void_p),
966+
("tensor_types", ctypes.c_void_p),
961967
]
962968

963969

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy