Skip to content

Commit 9af23e5

Browse files
julien-cmishig25nsarrazin
authored
[Tiny Agents] Expose a OpenAI-compatible Web server (#1473)
If you think about it, an Agent can easily be wrapped into an OpenAI-compatible Chat Completion endpoint as if it was a "plain" model. 💡 One would simply need to display the tool call info in a specific UI, similar to what we do for reasoning tokens. Hence, I chose to wrap the tool call infos into a set of `<tool_call_info>...</tool_call_info>` tags. ### How to run an example ```bash # Start a web server on port 9999 # cd packages/tiny-agents pnpm cli:watch serve ./src/agents/julien-c/local-coder/ ``` Then run an example to see how it works, calling our standard `chatCompletionStream` method from `@huggingface/inference` ```bash # cd packages/tiny-agents tsx src/example.ts ``` --------- Co-authored-by: Mishig <dmishig@gmail.com> Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
1 parent fc1731a commit 9af23e5

File tree

6 files changed

+181
-18
lines changed

6 files changed

+181
-18
lines changed

packages/mcp-client/src/Agent.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ const exitLoopTools = [taskCompletionTool, askQuestionTool];
4646

4747
export class Agent extends McpClient {
4848
private readonly servers: (ServerConfig | StdioServerParameters)[];
49+
public readonly prompt: string;
4950
protected messages: ChatCompletionInputMessage[];
5051

5152
constructor({
@@ -73,10 +74,11 @@ export class Agent extends McpClient {
7374
super(provider ? { provider, endpointUrl, model, apiKey } : { provider, endpointUrl, model, apiKey });
7475
/// ^This shenanigan is just here to please an overzealous TS type-checker.
7576
this.servers = servers;
77+
this.prompt = prompt ?? DEFAULT_SYSTEM_PROMPT;
7678
this.messages = [
7779
{
7880
role: "system",
79-
content: prompt ?? DEFAULT_SYSTEM_PROMPT,
81+
content: this.prompt,
8082
},
8183
];
8284
}
@@ -86,19 +88,27 @@ export class Agent extends McpClient {
8688
}
8789

8890
async *run(
89-
input: string,
91+
input: string | ChatCompletionInputMessage[],
9092
opts: { abortSignal?: AbortSignal } = {}
9193
): AsyncGenerator<ChatCompletionStreamOutput | ChatCompletionInputMessageTool> {
92-
this.messages.push({
93-
role: "user",
94-
content: input,
95-
});
94+
let messages: ChatCompletionInputMessage[];
95+
if (typeof input === "string") {
96+
/// Use internal array of messages
97+
this.messages.push({
98+
role: "user",
99+
content: input,
100+
});
101+
messages = this.messages;
102+
} else {
103+
/// Use the passed messages directly
104+
messages = input;
105+
}
96106

97107
let numOfTurns = 0;
98108
let nextTurnShouldCallTools = true;
99109
while (true) {
100110
try {
101-
yield* this.processSingleTurnWithTools(this.messages, {
111+
yield* this.processSingleTurnWithTools(messages, {
102112
exitLoopTools,
103113
exitIfFirstChunkNoTool: numOfTurns > 0 && nextTurnShouldCallTools,
104114
abortSignal: opts.abortSignal,
@@ -111,7 +121,7 @@ export class Agent extends McpClient {
111121
}
112122
numOfTurns++;
113123
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
114-
const currentLast = this.messages.at(-1)!;
124+
const currentLast = messages.at(-1)!;
115125
debug("current role", currentLast.role);
116126
if (
117127
currentLast.role === "tool" &&

packages/tiny-agents/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
"prepare": "pnpm run build",
3535
"test": "vitest run",
3636
"check": "tsc",
37-
"cli": "tsx src/cli.ts"
37+
"cli": "tsx src/cli.ts",
38+
"cli:watch": "tsx watch src/cli.ts"
3839
},
3940
"files": [
4041
"src",

packages/tiny-agents/src/cli.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { version as packageVersion } from "../package.json";
99
import { InputConfigSchema, ServerConfigSchema } from "./lib/types";
1010
import { debug, error, ANSI } from "./lib/utils";
1111
import { mainCliLoop } from "./lib/mainCliLoop";
12+
import { startServer } from "./lib/webServer";
1213
import { loadConfigFrom } from "./lib/loadConfigFrom";
1314

1415
const USAGE_HELP = `
@@ -212,13 +213,13 @@ async function main() {
212213
}
213214
);
214215

215-
if (command === "serve") {
216-
error(`Serve is not implemented yet, coming soon!`);
217-
process.exit(1);
216+
debug(agent);
217+
await agent.loadTools();
218+
219+
if (command === "run") {
220+
mainCliLoop(agent);
218221
} else {
219-
debug(agent);
220-
// main loop from mcp-client/cli.ts
221-
await mainCliLoop(agent);
222+
startServer(agent);
222223
}
223224
}
224225

packages/tiny-agents/src/example.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { chatCompletionStream } from "@huggingface/inference";
2+
3+
async function main() {
4+
const endpointUrl = `http://localhost:9999/v1/chat/completions`;
5+
// launch "tiny-agents serve" before running this
6+
7+
for await (const chunk of chatCompletionStream({
8+
endpointUrl,
9+
model: "",
10+
messages: [{ role: "user", content: "What are the top 5 trending models on Hugging Face?" }],
11+
})) {
12+
console.log(chunk.choices[0]?.delta.content);
13+
}
14+
}
15+
16+
if (require.main === module) {
17+
main();
18+
}

packages/tiny-agents/src/lib/mainCliLoop.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ import type { ChatCompletionStreamOutput } from "@huggingface/tasks";
55
import type { Agent } from "../index";
66

77
/**
8-
* From mcp-client/cli.ts
8+
* From mcp-client/cli.ts,
9+
* minus the agent.loadTools() done upstream.
910
*/
1011
export async function mainCliLoop(agent: Agent): Promise<void> {
1112
const rl = readline.createInterface({ input: stdin, output: stdout });
@@ -40,8 +41,6 @@ export async function mainCliLoop(agent: Agent): Promise<void> {
4041
throw err;
4142
});
4243

43-
await agent.loadTools();
44-
4544
stdout.write(ANSI.BLUE);
4645
stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`);
4746
stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n"));
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import type { IncomingMessage } from "node:http";
2+
import { createServer, ServerResponse } from "node:http";
3+
import type { AddressInfo } from "node:net";
4+
import { z } from "zod";
5+
import type { Agent } from "../index";
6+
import { ANSI } from "./utils";
7+
import { stdout } from "node:process";
8+
import type { ChatCompletionStreamOutput } from "@huggingface/tasks";
9+
10+
const REQUEST_ID_HEADER = "X-Request-Id";
11+
12+
const ChatCompletionInputSchema = z.object({
13+
messages: z.array(
14+
z.object({
15+
role: z.enum(["user", "assistant"]),
16+
content: z.string().or(
17+
z.array(
18+
z
19+
.object({
20+
type: z.literal("text"),
21+
text: z.string(),
22+
})
23+
.or(
24+
z.object({
25+
type: z.literal("image_url"),
26+
image_url: z.object({
27+
url: z.string(),
28+
}),
29+
})
30+
)
31+
)
32+
),
33+
})
34+
),
35+
/// Only allow stream: true
36+
stream: z.literal(true),
37+
});
38+
function getJsonBody(req: IncomingMessage) {
39+
return new Promise((resolve, reject) => {
40+
let data = "";
41+
req.on("data", (chunk) => (data += chunk));
42+
req.on("end", () => {
43+
try {
44+
resolve(JSON.parse(data));
45+
} catch (e) {
46+
reject(e);
47+
}
48+
});
49+
req.on("error", reject);
50+
});
51+
}
52+
class ServerResp extends ServerResponse {
53+
error(statusCode: number, reason: string) {
54+
this.writeHead(statusCode).end(JSON.stringify({ error: reason }));
55+
}
56+
}
57+
58+
export function startServer(agent: Agent): void {
59+
const server = createServer({ ServerResponse: ServerResp }, async (req, res) => {
60+
res.setHeader(REQUEST_ID_HEADER, crypto.randomUUID());
61+
res.setHeader("Content-Type", "application/json");
62+
if (req.method === "POST" && req.url === "/v1/chat/completions") {
63+
let body: unknown;
64+
let requestBody: z.infer<typeof ChatCompletionInputSchema>;
65+
try {
66+
body = await getJsonBody(req);
67+
} catch {
68+
return res.error(400, "Invalid JSON");
69+
}
70+
try {
71+
requestBody = ChatCompletionInputSchema.parse(body);
72+
} catch (err) {
73+
if (err instanceof z.ZodError) {
74+
return res.error(400, "Invalid ChatCompletionInput body \n" + JSON.stringify(err));
75+
}
76+
return res.error(400, "Invalid ChatCompletionInput body");
77+
}
78+
/// Ok, from now on we will send a SSE (Server-Sent Events) response.
79+
res.setHeaders(
80+
new Headers({
81+
"Content-Type": "text/event-stream",
82+
"Cache-Control": "no-cache",
83+
Connection: "keep-alive",
84+
})
85+
);
86+
87+
/// Prepend the agent's prompt
88+
const messages = [
89+
{
90+
role: "system",
91+
content: agent.prompt,
92+
},
93+
...requestBody.messages,
94+
];
95+
96+
for await (const chunk of agent.run(messages)) {
97+
if ("choices" in chunk) {
98+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
99+
} else {
100+
/// Tool call info
101+
/// /!\ We format it as a regular chunk of role = "tool"
102+
const chunkToolcallInfo = {
103+
choices: [
104+
{
105+
index: 0,
106+
delta: {
107+
role: "tool",
108+
content: `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + chunk.content,
109+
},
110+
},
111+
],
112+
created: Math.floor(Date.now() / 1000),
113+
id: chunk.tool_call_id,
114+
model: "",
115+
system_fingerprint: "",
116+
} satisfies ChatCompletionStreamOutput;
117+
118+
res.write(`data: ${JSON.stringify(chunkToolcallInfo)}\n\n`);
119+
}
120+
}
121+
res.end();
122+
} else {
123+
res.error(404, "Route or method not found, try POST /v1/chat/completions");
124+
}
125+
});
126+
server.listen(process.env.PORT ? parseInt(process.env.PORT) : 9_999, () => {
127+
stdout.write(ANSI.BLUE);
128+
stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`);
129+
stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n"));
130+
stdout.write(ANSI.RESET);
131+
stdout.write("\n");
132+
console.log(ANSI.GRAY + `listening on http://localhost:${(server.address() as AddressInfo).port}` + ANSI.RESET);
133+
});
134+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy