Content-Length: 655458 | pFad | http://github.com/huggingface/huggingface.js/commit/24f86c78ac6b88ac5db5096705db8b77f8eb79da

1E [Inference Providers] feat. Refactor the NovitaTextToVideoTask using … · huggingface/huggingface.js@24f86c7 · GitHub
Skip to content

Commit 24f86c7

Browse files
[Inference Providers] feat. Refactor the NovitaTextToVideoTask using the async API. (#1459)
This PR refactors the `NovitaTextToVideoTask` using the async API, aiming to allow inference with models that may take over 2 minutes to generate results. --------- Co-authored-by: SBrandeis <simon@huggingface.co>
1 parent 50076ec commit 24f86c7

File tree

4 files changed

+85
-25
lines changed

4 files changed

+85
-25
lines changed

packages/inference/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Currently, we support the following providers:
5353
- [HF Inference](https://huggingface.co/docs/inference-providers/providers/hf-inference)
5454
- [Hyperbolic](https://hyperbolic.xyz)
5555
- [Nebius](https://studio.nebius.ai)
56-
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
56+
- [Novita](https://novita.ai)
5757
- [Nscale](https://nscale.com)
5858
- [OVHcloud](https://endpoints.ai.cloud.ovh.net/)
5959
- [Replicate](https://replicate.com)
@@ -96,6 +96,7 @@ Only a subset of models are supported when requesting third-party providers. You
9696
- [Cohere supported models](https://huggingface.co/api/partners/cohere/models)
9797
- [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models)
9898
- [Groq supported models](https://console.groq.com/docs/models)
99+
- [Novita AI supported models](https://huggingface.co/api/partners/novita/models)
99100

100101
**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
101102
This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you!

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
120120
novita: {
121121
conversational: new Novita.NovitaConversationalTask(),
122122
"text-generation": new Novita.NovitaTextGenerationTask(),
123+
"text-to-video": new Novita.NovitaTextToVideoTask(),
123124
},
124125
nscale: {
125126
"text-to-image": new Nscale.NscaleTextToImageTask(),

packages/inference/src/providers/novita.ts

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
*/
1717
import { InferenceOutputError } from "../lib/InferenceOutputError.js";
1818
import { isUrl } from "../lib/isUrl.js";
19+
import type { TextToVideoArgs } from "../tasks/index.js";
1920
import type { BodyParams, UrlParams } from "../types.js";
21+
import { delay } from "../utils/delay.js";
2022
import { omit } from "../utils/omit.js";
2123
import {
2224
BaseConversationalTask,
@@ -26,11 +28,11 @@ import {
2628
} from "./providerHelper.js";
2729

2830
const NOVITA_API_BASE_URL = "https://api.novita.ai";
29-
export interface NovitaOutput {
30-
video: {
31-
video_url: string;
32-
};
31+
32+
export interface NovitaAsyncAPIOutput {
33+
task_id: string;
3334
}
35+
3436
export class NovitaTextGenerationTask extends BaseTextGenerationTask {
3537
constructor() {
3638
super("novita", NOVITA_API_BASE_URL);
@@ -50,38 +52,94 @@ export class NovitaConversationalTask extends BaseConversationalTask {
5052
return "/v3/openai/chat/completions";
5153
}
5254
}
55+
5356
export class NovitaTextToVideoTask extends TaskProviderHelper implements TextToVideoTaskHelper {
5457
constructor() {
5558
super("novita", NOVITA_API_BASE_URL);
5659
}
5760

58-
makeRoute(params: UrlParams): string {
59-
return `/v3/hf/${params.model}`;
61+
override makeRoute(params: UrlParams): string {
62+
return `/v3/async/${params.model}`;
6063
}
6164

62-
preparePayload(params: BodyParams): Record<string, unknown> {
65+
override preparePayload(params: BodyParams<TextToVideoArgs>): Record<string, unknown> {
66+
const { num_inference_steps, ...restParameters } = params.args.parameters ?? {};
6367
return {
6468
...omit(params.args, ["inputs", "parameters"]),
65-
...(params.args.parameters as Record<string, unknown>),
69+
...restParameters,
70+
steps: num_inference_steps,
6671
prompt: params.args.inputs,
6772
};
6873
}
69-
override async getResponse(response: NovitaOutput): Promise<Blob> {
70-
const isValidOutput =
71-
typeof response === "object" &&
72-
!!response &&
73-
"video" in response &&
74-
typeof response.video === "object" &&
75-
!!response.video &&
76-
"video_url" in response.video &&
77-
typeof response.video.video_url === "string" &&
78-
isUrl(response.video.video_url);
7974

80-
if (!isValidOutput) {
81-
throw new InferenceOutputError("Expected { video: { video_url: string } }");
75+
override async getResponse(
76+
response: NovitaAsyncAPIOutput,
77+
url?: string,
78+
headers?: Record<string, string>
79+
): Promise<Blob> {
80+
if (!url || !headers) {
81+
throw new InferenceOutputError("URL and headers are required for text-to-video task");
8282
}
83+
const taskId = response.task_id;
84+
if (!taskId) {
85+
throw new InferenceOutputError("No task ID found in the response");
86+
}
87+
88+
const parsedUrl = new URL(url);
89+
const baseUrl = `${parsedUrl.protocol}//${parsedUrl.host}${
90+
parsedUrl.host === "router.huggingface.co" ? "/novita" : ""
91+
}`;
92+
const resultUrl = `${baseUrl}/v3/async/task-result?task_id=${taskId}`;
93+
94+
let status = "";
95+
let taskResult: unknown;
8396

84-
const urlResponse = await fetch(response.video.video_url);
85-
return await urlResponse.blob();
97+
while (status !== "TASK_STATUS_SUCCEED" && status !== "TASK_STATUS_FAILED") {
98+
await delay(500);
99+
const resultResponse = await fetch(resultUrl, { headers });
100+
if (!resultResponse.ok) {
101+
throw new InferenceOutputError("Failed to fetch task result");
102+
}
103+
try {
104+
taskResult = await resultResponse.json();
105+
if (
106+
taskResult &&
107+
typeof taskResult === "object" &&
108+
"task" in taskResult &&
109+
taskResult.task &&
110+
typeof taskResult.task === "object" &&
111+
"status" in taskResult.task &&
112+
typeof taskResult.task.status === "string"
113+
) {
114+
status = taskResult.task.status;
115+
} else {
116+
throw new InferenceOutputError("Failed to get task status");
117+
}
118+
} catch (error) {
119+
throw new InferenceOutputError("Failed to parse task result");
120+
}
121+
}
122+
123+
if (status === "TASK_STATUS_FAILED") {
124+
throw new InferenceOutputError("Task failed");
125+
}
126+
127+
if (
128+
typeof taskResult === "object" &&
129+
!!taskResult &&
130+
"videos" in taskResult &&
131+
typeof taskResult.videos === "object" &&
132+
!!taskResult.videos &&
133+
Array.isArray(taskResult.videos) &&
134+
taskResult.videos.length > 0 &&
135+
"video_url" in taskResult.videos[0] &&
136+
typeof taskResult.videos[0].video_url === "string" &&
137+
isUrl(taskResult.videos[0].video_url)
138+
) {
139+
const urlResponse = await fetch(taskResult.videos[0].video_url);
140+
return await urlResponse.blob();
141+
} else {
142+
throw new InferenceOutputError("Expected { videos: [{ video_url: string }] }");
143+
}
86144
}
87145
}

packages/inference/src/tasks/cv/textToVideo.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { resolveProvider } from "../../lib/getInferenceProviderMapping.js";
33
import { getProviderHelper } from "../../lib/getProviderHelper.js";
44
import { makeRequestOptions } from "../../lib/makeRequestOptions.js";
55
import type { FalAiQueueOutput } from "../../providers/fal-ai.js";
6-
import type { NovitaOutput } from "../../providers/novita.js";
6+
import type { NovitaAsyncAPIOutput } from "../../providers/novita.js";
77
import type { ReplicateOutput } from "../../providers/replicate.js";
88
import type { BaseArgs, Options } from "../../types.js";
99
import { innerRequest } from "../../utils/request.js";
@@ -15,7 +15,7 @@ export type TextToVideoOutput = Blob;
1515
export async function textToVideo(args: TextToVideoArgs, options?: Options): Promise<TextToVideoOutput> {
1616
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
1717
const providerHelper = getProviderHelper(provider, "text-to-video");
18-
const { data: response } = await innerRequest<FalAiQueueOutput | ReplicateOutput | NovitaOutput>(
18+
const { data: response } = await innerRequest<FalAiQueueOutput | ReplicateOutput | NovitaAsyncAPIOutput>(
1919
args,
2020
providerHelper,
2121
{

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/huggingface/huggingface.js/commit/24f86c78ac6b88ac5db5096705db8b77f8eb79da

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy