diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 4212669eb..281ee7918 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -410,8 +410,8 @@ def __init__( if self.verbose: print(f"Model metadata: {self.metadata}", file=sys.stderr) - eos_token_id = int(self.metadata.get("tokenizer.ggml.eos_token_id", self.token_eos())) - bos_token_id = int(self.metadata.get("tokenizer.ggml.bos_token_id", self.token_bos())) + eos_token_id = self.token_eos() + bos_token_id = self.token_bos() eos_token = self._model.token_get_text(eos_token_id) bos_token = self._model.token_get_text(bos_token_id) @@ -961,9 +961,9 @@ def _create_completion( completion_id: str = f"cmpl-{str(uuid.uuid4())}" created: int = int(time.time()) - prefix_token_id: int = int(self.metadata.get("tokenizer.ggml.prefix_token_id", self._model.token_prefix())) - middle_token_id: int = int(self.metadata.get("tokenizer.ggml.middle_token_id", self._model.token_middle())) - suffix_token_id: int = int(self.metadata.get("tokenizer.ggml.suffix_token_id", self._model.token_suffix())) + prefix_token_id: int = self._model.token_prefix() + middle_token_id: int = self._model.token_middle() + suffix_token_id: int = self._model.token_suffix() # If prompt is empty, initialize completion with BOS token to avoid # detokenization including a space at the beginning of the completion completion_tokens: List[int] = [] if len(prompt) > 0 else [self.token_bos()]
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: