@@ -8,7 +8,7 @@ use sailfish::TemplateOnce;
8
8
pub struct ServerlessModels {
9
9
style_type : String ,
10
10
embedding_models : [ Component ; 4 ] ,
11
- instruct_models : [ Component ; 5 ] ,
11
+ instruct_models : [ Component ; 6 ] ,
12
12
summarization_models : [ Component ; 1 ] ,
13
13
}
14
14
@@ -48,46 +48,53 @@ impl ServerlessModels {
48
48
] ,
49
49
instruct_models : [
50
50
Component :: from ( Row :: new ( & [
51
- "meta-llama/Meta-Llama-3.1-70B -Instruct" . into ( ) ,
52
- "70,000 " . into ( ) ,
53
- "70,000 " . into ( ) ,
54
- "8,000 " . into ( ) ,
51
+ "meta-llama/Meta-Llama-3.1-405B -Instruct" . into ( ) ,
52
+ "405 " . into ( ) ,
53
+ "405 " . into ( ) ,
54
+ "128k " . into ( ) ,
55
55
"Highest quality" . into ( ) ,
56
56
] ) ) ,
57
+ Component :: from ( Row :: new ( & [
58
+ "meta-llama/Meta-Llama-3.1-70B-Instruct" . into ( ) ,
59
+ "70" . into ( ) ,
60
+ "70" . into ( ) ,
61
+ "128k" . into ( ) ,
62
+ "High quality" . into ( ) ,
63
+ ] ) ) ,
57
64
Component :: from ( Row :: new ( & [
58
65
"meta-llama/Meta-Llama-3.1-8B-Instruct" . into ( ) ,
59
- "8,000 " . into ( ) ,
60
- "8,000 " . into ( ) ,
61
- "8,000 " . into ( ) ,
62
- "High quality, low latency" . into ( ) ,
66
+ "8" . into ( ) ,
67
+ "8" . into ( ) ,
68
+ "128k " . into ( ) ,
69
+ "Low latency" . into ( ) ,
63
70
] ) ) ,
64
71
Component :: from ( Row :: new ( & [
65
72
"microsoft/Phi-3-mini-128k-instruct" . into ( ) ,
66
- "3,820 " . into ( ) ,
67
- "3,820 " . into ( ) ,
68
- "128,000 " . into ( ) ,
73
+ "3.8 " . into ( ) ,
74
+ "3.8 " . into ( ) ,
75
+ "128k " . into ( ) ,
69
76
"Lowest latency" . into ( ) ,
70
77
] ) ) ,
71
78
Component :: from ( Row :: new ( & [
72
79
"mistralai/Mixtral-8x7B-Instruct-v0.1" . into ( ) ,
73
- "56,000 " . into ( ) ,
74
- "12,900 " . into ( ) ,
75
- "32,768 " . into ( ) ,
80
+ "56" . into ( ) ,
81
+ "12.9 " . into ( ) ,
82
+ "32k " . into ( ) ,
76
83
"MOE high quality" . into ( ) ,
77
84
] ) ) ,
78
85
Component :: from ( Row :: new ( & [
79
86
"mistralai/Mistral-7B-Instruct-v0.2" . into ( ) ,
80
- "7,000 " . into ( ) ,
81
- "7,000 " . into ( ) ,
82
- "32,768 " . into ( ) ,
83
- "High quality, low latency" . into ( ) ,
87
+ "7" . into ( ) ,
88
+ "7" . into ( ) ,
89
+ "32k " . into ( ) ,
90
+ "Low latency" . into ( ) ,
84
91
] ) ) ,
85
92
] ,
86
93
summarization_models : [ Component :: from ( Row :: new ( & [
87
94
"google/pegasus-xsum" . into ( ) ,
88
95
"568" . into ( ) ,
89
96
"512" . into ( ) ,
90
- "8,000 " . into ( ) ,
97
+ "8k " . into ( ) ,
91
98
] ) ) ] ,
92
99
}
93
100
}
0 commit comments