Content-Length: 847844 | pFad | http://github.com/huggingface/huggingface.js/commit/9af23e5e160df8d8dfc2ee77fa23eaef60748806

E3 [Tiny Agents] Expose a OpenAI-compatible Web server (#1473) · huggingface/huggingface.js@9af23e5 · GitHub
Skip to content

Commit 9af23e5

Browse files
julien-cmishig25nsarrazin
authored
[Tiny Agents] Expose a OpenAI-compatible Web server (#1473)
If you think about it, an Agent can easily be wrapped into an OpenAI-compatible Chat Completion endpoint as if it was a "plain" model. 💡 One would simply need to display the tool call info in a specific UI, similar to what we do for reasoning tokens. Hence, I chose to wrap the tool call infos into a set of `<tool_call_info>...</tool_call_info>` tags. ### How to run an example ```bash # Start a web server on port 9999 # cd packages/tiny-agents pnpm cli:watch serve ./src/agents/julien-c/local-coder/ ``` Then run an example to see how it works, calling our standard `chatCompletionStream` method from `@huggingface/inference` ```bash # cd packages/tiny-agents tsx src/example.ts ``` --------- Co-authored-by: Mishig <dmishig@gmail.com> Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
1 parent fc1731a commit 9af23e5

File tree

6 files changed

+181
-18
lines changed

6 files changed

+181
-18
lines changed

packages/mcp-client/src/Agent.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ const exitLoopTools = [taskCompletionTool, askQuestionTool];
4646

4747
export class Agent extends McpClient {
4848
private readonly servers: (ServerConfig | StdioServerParameters)[];
49+
public readonly prompt: string;
4950
protected messages: ChatCompletionInputMessage[];
5051

5152
constructor({
@@ -73,10 +74,11 @@ export class Agent extends McpClient {
7374
super(provider ? { provider, endpointUrl, model, apiKey } : { provider, endpointUrl, model, apiKey });
7475
//github.com/ ^This shenanigan is just here to please an overzealous TS type-checker.
7576
this.servers = servers;
77+
this.prompt = prompt ?? DEFAULT_SYSTEM_PROMPT;
7678
this.messages = [
7779
{
7880
role: "system",
79-
content: prompt ?? DEFAULT_SYSTEM_PROMPT,
81+
content: this.prompt,
8082
},
8183
];
8284
}
@@ -86,19 +88,27 @@ export class Agent extends McpClient {
8688
}
8789

8890
async *run(
89-
input: string,
91+
input: string | ChatCompletionInputMessage[],
9092
opts: { abortSignal?: AbortSignal } = {}
9193
): AsyncGenerator<ChatCompletionStreamOutput | ChatCompletionInputMessageTool> {
92-
this.messages.push({
93-
role: "user",
94-
content: input,
95-
});
94+
let messages: ChatCompletionInputMessage[];
95+
if (typeof input === "string") {
96+
//github.com/ Use internal array of messages
97+
this.messages.push({
98+
role: "user",
99+
content: input,
100+
});
101+
messages = this.messages;
102+
} else {
103+
//github.com/ Use the passed messages directly
104+
messages = input;
105+
}
96106

97107
let numOfTurns = 0;
98108
let nextTurnShouldCallTools = true;
99109
while (true) {
100110
try {
101-
yield* this.processSingleTurnWithTools(this.messages, {
111+
yield* this.processSingleTurnWithTools(messages, {
102112
exitLoopTools,
103113
exitIfFirstChunkNoTool: numOfTurns > 0 && nextTurnShouldCallTools,
104114
abortSignal: opts.abortSignal,
@@ -111,7 +121,7 @@ export class Agent extends McpClient {
111121
}
112122
numOfTurns++;
113123
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
114-
const currentLast = this.messages.at(-1)!;
124+
const currentLast = messages.at(-1)!;
115125
debug("current role", currentLast.role);
116126
if (
117127
currentLast.role === "tool" &&

packages/tiny-agents/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
"prepare": "pnpm run build",
3535
"test": "vitest run",
3636
"check": "tsc",
37-
"cli": "tsx src/cli.ts"
37+
"cli": "tsx src/cli.ts",
38+
"cli:watch": "tsx watch src/cli.ts"
3839
},
3940
"files": [
4041
"src",

packages/tiny-agents/src/cli.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { version as packageVersion } from "../package.json";
99
import { InputConfigSchema, ServerConfigSchema } from "./lib/types";
1010
import { debug, error, ANSI } from "./lib/utils";
1111
import { mainCliLoop } from "./lib/mainCliLoop";
12+
import { startServer } from "./lib/webServer";
1213
import { loadConfigFrom } from "./lib/loadConfigFrom";
1314

1415
const USAGE_HELP = `
@@ -212,13 +213,13 @@ async function main() {
212213
}
213214
);
214215

215-
if (command === "serve") {
216-
error(`Serve is not implemented yet, coming soon!`);
217-
process.exit(1);
216+
debug(agent);
217+
await agent.loadTools();
218+
219+
if (command === "run") {
220+
mainCliLoop(agent);
218221
} else {
219-
debug(agent);
220-
// main loop from mcp-client/cli.ts
221-
await mainCliLoop(agent);
222+
startServer(agent);
222223
}
223224
}
224225

packages/tiny-agents/src/example.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { chatCompletionStream } from "@huggingface/inference";
2+
3+
async function main() {
4+
const endpointUrl = `http://localhost:9999/v1/chat/completions`;
5+
// launch "tiny-agents serve" before running this
6+
7+
for await (const chunk of chatCompletionStream({
8+
endpointUrl,
9+
model: "",
10+
messages: [{ role: "user", content: "What are the top 5 trending models on Hugging Face?" }],
11+
})) {
12+
console.log(chunk.choices[0]?.delta.content);
13+
}
14+
}
15+
16+
if (require.main === module) {
17+
main();
18+
}

packages/tiny-agents/src/lib/mainCliLoop.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ import type { ChatCompletionStreamOutput } from "@huggingface/tasks";
55
import type { Agent } from "../index";
66

77
/**
8-
* From mcp-client/cli.ts
8+
* From mcp-client/cli.ts,
9+
* minus the agent.loadTools() done upstream.
910
*/
1011
export async function mainCliLoop(agent: Agent): Promise<void> {
1112
const rl = readline.createInterface({ input: stdin, output: stdout });
@@ -40,8 +41,6 @@ export async function mainCliLoop(agent: Agent): Promise<void> {
4041
throw err;
4142
});
4243

43-
await agent.loadTools();
44-
4544
stdout.write(ANSI.BLUE);
4645
stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`);
4746
stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n"));
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import type { IncomingMessage } from "node:http";
2+
import { createServer, ServerResponse } from "node:http";
3+
import type { AddressInfo } from "node:net";
4+
import { z } from "zod";
5+
import type { Agent } from "../index";
6+
import { ANSI } from "./utils";
7+
import { stdout } from "node:process";
8+
import type { ChatCompletionStreamOutput } from "@huggingface/tasks";
9+
10+
const REQUEST_ID_HEADER = "X-Request-Id";
11+
12+
const ChatCompletionInputSchema = z.object({
13+
messages: z.array(
14+
z.object({
15+
role: z.enum(["user", "assistant"]),
16+
content: z.string().or(
17+
z.array(
18+
z
19+
.object({
20+
type: z.literal("text"),
21+
text: z.string(),
22+
})
23+
.or(
24+
z.object({
25+
type: z.literal("image_url"),
26+
image_url: z.object({
27+
url: z.string(),
28+
}),
29+
})
30+
)
31+
)
32+
),
33+
})
34+
),
35+
//github.com/ Only allow stream: true
36+
stream: z.literal(true),
37+
});
38+
function getJsonBody(req: IncomingMessage) {
39+
return new Promise((resolve, reject) => {
40+
let data = "";
41+
req.on("data", (chunk) => (data += chunk));
42+
req.on("end", () => {
43+
try {
44+
resolve(JSON.parse(data));
45+
} catch (e) {
46+
reject(e);
47+
}
48+
});
49+
req.on("error", reject);
50+
});
51+
}
52+
class ServerResp extends ServerResponse {
53+
error(statusCode: number, reason: string) {
54+
this.writeHead(statusCode).end(JSON.stringify({ error: reason }));
55+
}
56+
}
57+
58+
export function startServer(agent: Agent): void {
59+
const server = createServer({ ServerResponse: ServerResp }, async (req, res) => {
60+
res.setHeader(REQUEST_ID_HEADER, crypto.randomUUID());
61+
res.setHeader("Content-Type", "application/json");
62+
if (req.method === "POST" && req.url === "/v1/chat/completions") {
63+
let body: unknown;
64+
let requestBody: z.infer<typeof ChatCompletionInputSchema>;
65+
try {
66+
body = await getJsonBody(req);
67+
} catch {
68+
return res.error(400, "Invalid JSON");
69+
}
70+
try {
71+
requestBody = ChatCompletionInputSchema.parse(body);
72+
} catch (err) {
73+
if (err instanceof z.ZodError) {
74+
return res.error(400, "Invalid ChatCompletionInput body \n" + JSON.stringify(err));
75+
}
76+
return res.error(400, "Invalid ChatCompletionInput body");
77+
}
78+
//github.com/ Ok, from now on we will send a SSE (Server-Sent Events) response.
79+
res.setHeaders(
80+
new Headers({
81+
"Content-Type": "text/event-stream",
82+
"Cache-Control": "no-cache",
83+
Connection: "keep-alive",
84+
})
85+
);
86+
87+
//github.com/ Prepend the agent's prompt
88+
const messages = [
89+
{
90+
role: "system",
91+
content: agent.prompt,
92+
},
93+
...requestBody.messages,
94+
];
95+
96+
for await (const chunk of agent.run(messages)) {
97+
if ("choices" in chunk) {
98+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
99+
} else {
100+
//github.com/ Tool call info
101+
//github.com/ /!\ We format it as a regular chunk of role = "tool"
102+
const chunkToolcallInfo = {
103+
choices: [
104+
{
105+
index: 0,
106+
delta: {
107+
role: "tool",
108+
content: `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + chunk.content,
109+
},
110+
},
111+
],
112+
created: Math.floor(Date.now() / 1000),
113+
id: chunk.tool_call_id,
114+
model: "",
115+
system_fingerprint: "",
116+
} satisfies ChatCompletionStreamOutput;
117+
118+
res.write(`data: ${JSON.stringify(chunkToolcallInfo)}\n\n`);
119+
}
120+
}
121+
res.end();
122+
} else {
123+
res.error(404, "Route or method not found, try POST /v1/chat/completions");
124+
}
125+
});
126+
server.listen(process.env.PORT ? parseInt(process.env.PORT) : 9_999, () => {
127+
stdout.write(ANSI.BLUE);
128+
stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`);
129+
stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n"));
130+
stdout.write(ANSI.RESET);
131+
stdout.write("\n");
132+
console.log(ANSI.GRAY + `listening on http://localhost:${(server.address() as AddressInfo).port}` + ANSI.RESET);
133+
});
134+
}

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/huggingface/huggingface.js/commit/9af23e5e160df8d8dfc2ee77fa23eaef60748806

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy