diff --git a/docker/openblas_simple/Dockerfile b/docker/openblas_simple/Dockerfile
index 020c34df7..d852dc830 100644
--- a/docker/openblas_simple/Dockerfile
+++ b/docker/openblas_simple/Dockerfile
@@ -12,4 +12,4 @@ RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fas
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose
# Run the server
-CMD python3 -m llama_cpp.server
+CMD python3 -m llama_cpp.server --host 0.0.0.0 --port 8000 --n_ctx 4096
\ No newline at end of file
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 67ee2dbc6..c5cef5baf 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -887,6 +887,7 @@ def _create_completion(
completion_tokens: List[int] = []
# Add blank space to start of prompt to match OG llama tokenizer
prompt_tokens: List[int] = self.tokenize(prompt.encode("utf-8")) if prompt != "" else [self.token_bos()]
+ print(prompt)
text: bytes = b""
returned_tokens: int = 0
stop = (
@@ -1560,12 +1561,31 @@ def create_chat_completion(
stop = (
stop if isinstance(stop, list) else [stop] if isinstance(stop, str) else []
)
- chat_history = "".join(
- f'### {"Human" if message["role"] == "user" else "Assistant"}:{message["content"]}'
- for message in messages
- )
- PROMPT = chat_history + "### Assistant:"
- PROMPT_STOP = ["### Assistant:", "### Human:"]
+
+ # assistant, user, assistant
+ # system, user, assistant
+ # chat_history = "".join(
+ # f'### {"Human" if message["role"] == "user" else "Assistant"}:{message["content"]}'
+ # for message in messages
+ # )
+ # PROMPT = chat_history + "### Assistant:"
+
+
+ system_prompt_template = "[INST] <
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: