forked from mlc-ai/web-llm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmessage.ts
163 lines (152 loc) · 4.1 KB
/
message.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import { AppConfig, ChatOptions } from "./config";
import { InitProgressReport, LogLevel } from "./types";
import {
ChatCompletionRequestStreaming,
ChatCompletionRequestNonStreaming,
ChatCompletion,
ChatCompletionChunk,
CompletionCreateParamsNonStreaming,
CompletionCreateParamsStreaming,
Completion,
EmbeddingCreateParams,
CreateEmbeddingResponse,
} from "./openai_api_protocols/index";
/**
* Message kind used by worker
*/
type RequestKind =
| "reload"
| "runtimeStatsText"
| "interruptGenerate"
| "unload"
| "resetChat"
| "getMaxStorageBufferBindingSize"
| "getGPUVendor"
| "forwardTokensAndSample"
| "chatCompletionNonStreaming"
| "completionNonStreaming"
| "embedding"
| "getMessage"
| "chatCompletionStreamInit"
| "completionStreamInit"
| "completionStreamNextChunk"
| "customRequest"
| "keepAlive"
| "setLogLevel"
| "setAppConfig";
// eslint-disable-next-line @typescript-eslint/no-unused-vars
type ResponseKind = "return" | "throw" | "initProgressCallback";
export interface ReloadParams {
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface ResetChatParams {
keepStats: boolean;
modelId?: string;
}
export interface GetMessageParams {
modelId?: string;
}
export interface RuntimeStatsTextParams {
modelId?: string;
}
export interface ForwardTokensAndSampleParams {
inputIds: Array<number>;
isPrefill: boolean;
modelId?: string;
}
// Notes on the following Params with modelId and chatOpts:
// These fields are the model and chatOpts that the frontend engine expects the backend
// to be loaded with. If not loaded due to web/service worker unexpectedly killed,
// handler will call reload(). An engine can load multiple models, hence both are list.
// TODO(webllm-team): should add appConfig here as well if rigorous.
// Fore more, see https://github.com/mlc-ai/web-llm/pull/471
// Note on the messages with selectedModelId:
// This is the modelId this request uses. It is needed to identify which async generator
// to instantiate / use, since an engine can load multiple models, thus the handler
// needs to maintain multiple generators.
export interface ChatCompletionNonStreamingParams {
request: ChatCompletionRequestNonStreaming;
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface ChatCompletionStreamInitParams {
request: ChatCompletionRequestStreaming;
selectedModelId: string;
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface CompletionNonStreamingParams {
request: CompletionCreateParamsNonStreaming;
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface CompletionStreamInitParams {
request: CompletionCreateParamsStreaming;
selectedModelId: string;
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface EmbeddingParams {
request: EmbeddingCreateParams;
modelId: string[];
chatOpts?: ChatOptions[];
}
export interface CompletionStreamNextChunkParams {
selectedModelId: string;
}
export interface CustomRequestParams {
requestName: string;
requestMessage: string;
}
export type MessageContent =
| ReloadParams
| ResetChatParams
| GetMessageParams
| RuntimeStatsTextParams
| ForwardTokensAndSampleParams
| ChatCompletionNonStreamingParams
| ChatCompletionStreamInitParams
| CompletionNonStreamingParams
| CompletionStreamInitParams
| EmbeddingParams
| CompletionStreamNextChunkParams
| CustomRequestParams
| InitProgressReport
| LogLevel
| string
| null
| number
| ChatCompletion
| ChatCompletionChunk
| CreateEmbeddingResponse
| Completion
| AppConfig
| void;
/**
* The message used in exchange between worker
* and the main thread.
*/
export type WorkerRequest = {
kind: RequestKind;
uuid: string;
content: MessageContent;
};
type HeartbeatWorkerResponse = {
kind: "heartbeat";
uuid: string;
};
type OneTimeWorkerResponse = {
kind: "return" | "throw";
uuid: string;
content: MessageContent;
};
type InitProgressWorkerResponse = {
kind: "initProgressCallback";
uuid: string;
content: InitProgressReport;
};
export type WorkerResponse =
| OneTimeWorkerResponse
| InitProgressWorkerResponse
| HeartbeatWorkerResponse;