diff --git a/CHANGELOG.md b/CHANGELOG.md index 605370e7d..affbd5db7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.9] + +- feat: Update llama.cpp to ggerganov/llama.cpp@8733e0cf6eefc7c7752297cc22d0836706f4222c + ## [0.3.8] - feat: Update llama.cpp to ggerganov/llama.cpp@7841fc723e059d1fd9640e5c0ef19050fcc7c698 diff --git a/CMakeLists.txt b/CMakeLists.txt index 64a0304a1..b9178e856 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,9 @@ if (LLAMA_BUILD) # Enable building of the common library set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE) + # Disable building curl support + set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE) + # Architecture detection and settings for Apple platforms if (APPLE) # Get the target architecture @@ -143,7 +146,7 @@ if (LLAMA_BUILD) endif() # Building llava - add_subdirectory(vendor/llama.cpp/examples/llava) + add_subdirectory(vendor/llama.cpp/tools/mtmd) set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava") if (WIN32) diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index b1a8b9baa..2c9c527cd 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.3.8" +__version__ = "0.3.9" diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 710bd83c8..63de3a93a 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -235,6 +235,8 @@ # LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30, # LLAMA_VOCAB_PRE_TYPE_TRILLION = 31, # LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32, +# LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, +# LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, # }; LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0 LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1 @@ -252,7 +254,7 @@ LLAMA_VOCAB_PRE_TYPE_DBRX = 13 LLAMA_VOCAB_PRE_TYPE_SMAUG = 14 LLAMA_VOCAB_PRE_TYPE_PORO = 15 -LLAMA_VOCAV_PRE_TYPE_CHATGLM3 = 16 +LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16 LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17 LLAMA_VOCAB_PRE_TYPE_VIKING = 18 LLAMA_VOCAB_PRE_TYPE_JAIS = 19 @@ -269,6 +271,8 @@ LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30 LLAMA_VOCAB_PRE_TYPE_TRILLION = 31 LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32 +LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33 +LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34 # // note: these values should be synchronized with ggml_rope @@ -891,17 +895,18 @@ class llama_context_params(ctypes.Structure): # // model quantization parameters # typedef struct llama_model_quantize_params { -# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() -# enum llama_ftype ftype; // quantize to this llama_ftype -# enum ggml_type output_tensor_type; // output tensor type -# enum ggml_type token_embedding_type; // token embeddings tensor type -# bool allow_requantize; // allow quantizing non-f32/f16 tensors -# bool quantize_output_tensor; // quantize output.weight -# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored -# bool pure; // quantize all tensors to the default type -# bool keep_split; // quantize to the same number of shards -# void * imatrix; // pointer to importance matrix data -# void * kv_overrides; // pointer to vector containing overrides +# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() +# enum llama_ftype ftype; // quantize to this llama_ftype +# enum ggml_type output_tensor_type; // output tensor type +# enum ggml_type token_embedding_type; // token embeddings tensor type +# bool allow_requantize; // allow quantizing non-f32/f16 tensors +# bool quantize_output_tensor; // quantize output.weight +# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored +# bool pure; // quantize all tensors to the default type +# bool keep_split; // quantize to the same number of shards +# void * imatrix; // pointer to importance matrix data +# void * kv_overrides; // pointer to vector containing overrides +# void * tensor_types; // pointer to vector containing tensor types # } llama_model_quantize_params; class llama_model_quantize_params(ctypes.Structure): """Parameters for llama_model_quantize @@ -918,6 +923,7 @@ class llama_model_quantize_params(ctypes.Structure): keep_split (bool): quantize to the same number of shards imatrix (ctypes.c_void_p): pointer to importance matrix data kv_overrides (ctypes.c_void_p): pointer to vector containing overrides + tensor_types (ctypes.c_void_p): pointer to vector containing tensor types """ if TYPE_CHECKING: @@ -932,6 +938,7 @@ class llama_model_quantize_params(ctypes.Structure): keep_split: bool imatrix: ctypes.c_void_p kv_overrides: ctypes.c_void_p + tensor_types: ctypes.c_void_p _fields_ = [ ("nthread", ctypes.c_int32), @@ -945,6 +952,7 @@ class llama_model_quantize_params(ctypes.Structure): ("keep_split", ctypes.c_bool), ("imatrix", ctypes.c_void_p), ("kv_overrides", ctypes.c_void_p), + ("tensor_types", ctypes.c_void_p), ] @@ -3812,6 +3820,7 @@ def llama_sampler_init_softmax() -> llama_sampler_p: # /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 +# /// Setting k <= 0 makes this a noop # LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k); @ctypes_function("llama_sampler_init_top_k", [ctypes.c_int32], llama_sampler_p_ctypes) def llama_sampler_init_top_k(k: int) -> llama_sampler_p: diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 6bf28f011..8733e0cf6 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 6bf28f0111ff9f21b3c1b1eace20c590281e7ba6 +Subproject commit 8733e0cf6eefc7c7752297cc22d0836706f4222c pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy