diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index ec9280986..aa6afc112 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -41,6 +41,11 @@
     CreateEmbeddingRequest,
     CreateChatCompletionRequest,
     ModelList,
+    TokenizeInputRequest,
+    TokenizeInputResponse,
+    TokenizeInputCountResponse,
+    DetokenizeInputRequest,
+    DetokenizeInputResponse,
 )
 from llama_cpp.server.errors import RouteErrorHandler
 
@@ -196,6 +201,9 @@ async def authenticate(
     )
 
 
+openai_v1_tag = "OpenAI V1"
+
+
 @router.post(
     "/v1/completions",
     summary="Completion",
@@ -227,11 +235,13 @@ async def authenticate(
             },
         }
     },
+    tags=[openai_v1_tag],
 )
 @router.post(
     "/v1/engines/copilot-codex/completions",
     include_in_schema=False,
     dependencies=[Depends(authenticate)],
+    tags=[openai_v1_tag],
 )
 async def create_completion(
     request: Request,
@@ -297,7 +307,10 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
 
 
 @router.post(
-    "/v1/embeddings", summary="Embedding", dependencies=[Depends(authenticate)]
+    "/v1/embeddings",
+    summary="Embedding",
+    dependencies=[Depends(authenticate)],
+    tags=[openai_v1_tag],
 )
 async def create_embedding(
     request: CreateEmbeddingRequest,
@@ -339,6 +352,7 @@ async def create_embedding(
             },
         }
     },
+    tags=[openai_v1_tag],
 )
 async def create_chat_completion(
     request: Request,
@@ -391,7 +405,12 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
         return iterator_or_completion
 
 
-@router.get("/v1/models", summary="Models", dependencies=[Depends(authenticate)])
+@router.get(
+    "/v1/models",
+    summary="Models",
+    dependencies=[Depends(authenticate)],
+    tags=[openai_v1_tag],
+)
 async def get_models(
     llama_proxy: LlamaProxy = Depends(get_llama_proxy),
 ) -> ModelList:
@@ -407,3 +426,51 @@ async def get_models(
             for model_alias in llama_proxy
         ],
     }
+
+
+extras_tag = "Extras"
+
+
+@router.post(
+    "/extras/tokenize",
+    summary="Tokenize",
+    dependencies=[Depends(authenticate)],
+    tags=[extras_tag],
+)
+async def tokenize(
+    body: TokenizeInputRequest,
+    llama_proxy: LlamaProxy = Depends(get_llama_proxy),
+) -> TokenizeInputResponse:
+    tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True)
+
+    return {"tokens": tokens}
+
+
+@router.post(
+    "/extras/tokenize/count",
+    summary="Tokenize Count",
+    dependencies=[Depends(authenticate)],
+    tags=[extras_tag],
+)
+async def count_query_tokens(
+    body: TokenizeInputRequest,
+    llama_proxy: LlamaProxy = Depends(get_llama_proxy),
+) -> TokenizeInputCountResponse:
+    tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True)
+
+    return {"count": len(tokens)}
+
+
+@router.post(
+    "/extras/detokenize",
+    summary="Detokenize",
+    dependencies=[Depends(authenticate)],
+    tags=[extras_tag],
+)
+async def detokenize(
+    body: DetokenizeInputRequest,
+    llama_proxy: LlamaProxy = Depends(get_llama_proxy),
+) -> DetokenizeInputResponse:
+    text = llama_proxy(body.model).detokenize(body.tokens).decode("utf-8")
+
+    return {"text": text}
diff --git a/llama_cpp/server/types.py b/llama_cpp/server/types.py
index 9a4b81e09..c8b2ebc6c 100644
--- a/llama_cpp/server/types.py
+++ b/llama_cpp/server/types.py
@@ -264,3 +264,39 @@ class ModelData(TypedDict):
 class ModelList(TypedDict):
     object: Literal["list"]
     data: List[ModelData]
+
+
+class TokenizeInputRequest(BaseModel):
+    model: Optional[str] = model_field
+    input: Optional[str] = Field(description="The input to tokenize.")
+
+    model_config = {
+        "json_schema_extra": {"examples": [{"input": "How many tokens in this query?"}]}
+    }
+
+
+class TokenizeInputResponse(BaseModel):
+    tokens: List[int] = Field(description="A list of tokens.")
+
+    model_config = {"json_schema_extra": {"example": {"tokens": [123, 321, 222]}}}
+
+
+class TokenizeInputCountResponse(BaseModel):
+    count: int = Field(description="The number of tokens in the input.")
+
+    model_config = {"json_schema_extra": {"example": {"count": 5}}}
+
+
+class DetokenizeInputRequest(BaseModel):
+    model: Optional[str] = model_field
+    tokens: List[int] = Field(description="A list of toekns to detokenize.")
+
+    model_config = {"json_schema_extra": {"example": [{"tokens": [123, 321, 222]}]}}
+
+
+class DetokenizeInputResponse(BaseModel):
+    text: str = Field(description="The detokenized text.")
+
+    model_config = {
+        "json_schema_extra": {"example": {"text": "How many tokens in this query?"}}
+    }

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/abetlen/llama-cpp-python/pull/1136.diff" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/abetlen/llama-cpp-python/pull/1136.diff" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/abetlen/llama-cpp-python/pull/1136.diff" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/abetlen/llama-cpp-python/pull/1136.diff" target="_blank">pFad v4 Proxy</a></p></body>
</html>