diff --git a/README.md b/README.md index e3d76436a..8d0980e0d 100644 --- a/README.md +++ b/README.md @@ -577,6 +577,12 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format cha That will format the prompt according to how model expects it. You can find the prompt format in the model card. For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_format.py) and look for lines starting with "@register_chat_format". +If you have `huggingface-hub` installed, you can also use the `--hf_model_repo_id` flag to load a model from the Hugging Face Hub. + +```bash +python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model '*q8_0.gguf' +``` + ### Web Server Features - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion) diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py index 5308dc2a8..816d089f3 100644 --- a/llama_cpp/server/model.py +++ b/llama_cpp/server/model.py @@ -120,9 +120,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama: kv_overrides[key] = float(value) else: raise ValueError(f"Unknown value type {value_type}") + + import functools - _model = llama_cpp.Llama( - model_path=settings.model, + kwargs = {} + + if settings.hf_model_repo_id is not None: + create_fn = functools.partial(llama_cpp.Llama.from_pretrained, repo_id=settings.hf_model_repo_id, filename=settings.model) + else: + create_fn = llama_cpp.Llama + kwargs["model_path"] = settings.model + + + _model = create_fn( + **kwargs, # Model Params n_gpu_layers=settings.n_gpu_layers, main_gpu=settings.main_gpu, diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index 8ac8df881..bd806748c 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -143,6 +143,11 @@ class ModelSettings(BaseSettings): default=None, description="The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().", ) + # Loading from HuggingFace Model Hub + hf_model_repo_id: Optional[str] = Field( + default=None, + description="The model repo id to use for the HuggingFace tokenizer model.", + ) # Speculative Decoding draft_model: Optional[str] = Field( default=None, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy