forked from rai-project/dlframework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictor.proto
408 lines (367 loc) · 14.5 KB
/
predictor.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
syntax = "proto3";
// MLModelScope
//
// MLModelScope is a hardware/software agnostic platform
// to facilitate the evaluation, measurement, and introspection
// of ML models within AI pipelines. MLModelScope aids application
// developers in discovering and experimenting with models, data
// scientists developers in replicating and evaluating for publishing
// models, and system architects in understanding the performance of AI
// workloads.
package mlmodelscope.org.dlframework;
import "features.proto";
import "google/api/annotations.proto";
import "gogoproto/gogo.proto";
import "google/protobuf/duration.proto";
option (gogoproto.goproto_registration) = true;
option (gogoproto.gostring_all) = true;
option (gogoproto.equal_all) = true;
option (gogoproto.verbose_equal_all) = true;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
option (gogoproto.populate_all) = true;
option (gogoproto.testgen_all) = true;
option (gogoproto.benchgen_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option go_package = "dlframework";
message CPUOptions {}
message GPUOptions {
// A value between 0 and 1 that indicates what fraction of the
// available GPU memory to pre-allocate for each process. 1 means
// to pre-allocate all of the GPU memory, 0.5 means the process
// allocates ~50% of the available GPU memory.
double per_process_gpu_memory_fraction = 1
[ (gogoproto.moretags) =
"yaml:\"per_process_gpu_memory_fraction,omitempty\"" ];
// The type of GPU allocation strategy to use.
//
// Allowed values:
// "": The empty string (default) uses a system-chosen default
// which may change over time.
//
// "BFC": A "Best-fit with coalescing" algorithm, simplified from a
// version of dlmalloc.
string allocator_type = 2
[ (gogoproto.moretags) = "yaml:\"allocator_type,omitempty\"" ];
// A comma-separated list of GPU ids that determines the 'visible'
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
// can see 8 GPU devices in the process, and one wanted to map
// visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then
// one would specify this field as "5,3". This field is similar in spirit to
// the CUDA_VISIBLE_DEVICES environment variable, except it applies to the
// visible GPU devices in the process.
//
// NOTE: The GPU driver provides the process with the visible GPUs
// in an order which is not guaranteed to have any correlation to
// the *physical* GPU id in the machine. This field is used for
// remapping "visible" to "virtual", which means this operates only
// after the process starts. Users are required to use vendor
// specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
// physical to visible device mapping prior to invoking TensorFlow.
string visible_device_list = 5
[ (gogoproto.moretags) = "yaml:\"visible_device_list,omitempty\"" ];
// Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow,
// enabling this option forces all CPU tensors to be allocated with Cuda
// pinned memory. Normally, TensorFlow will infer which tensors should be
// allocated as the pinned memory. But in case where the inference is
// incomplete, this option can significantly speed up the cross-device memory
// copy performance as long as it fits the memory.
// Note that this option is not something that should be
// enabled by default for unknown or very large models, since all Cuda pinned
// memory is unpageable, having too much pinned memory might negatively impact
// the overall host system performance.
bool force_gpu_compatible = 8
[ (gogoproto.moretags) = "yaml:\"force_gpu_compatible,omitempty\"" ];
};
message TraceID {
string id = 1 [ (gogoproto.moretags) = "yaml:\"id,omitempty\"" ];
}
message ExecutionOptions {
enum TraceLevel {
NO_TRACE = 0;
APPLICATION_TRACE = 1; // pipelines within mlmodelscope
MODEL_TRACE = 2; // pipelines within model
FRAMEWORK_TRACE = 3; // layers within framework
ML_LIBRARY_TRACE = 4; // cudnn, ...
SYSTEM_LIBRARY_TRACE = 5; // cupti
HARDWARE_TRACE = 6; // perf, papi, ...
FULL_TRACE = 7; // includes all of the above
}
TraceID trace_id = 1 [ (gogoproto.moretags) = "yaml:\"trace_id,omitempty\"" ];
TraceLevel trace_level = 2
[ (gogoproto.moretags) = "yaml:\"trace_level,omitempty\"" ];
// Time to wait for operation to complete in milliseconds.
uint64 timeout_in_ms = 3
[ (gogoproto.moretags) = "yaml:\"timeout_in_ms,omitempty\"" ];
// Map from device type name (e.g., "CPU" or "GPU" ) to maximum
// number of devices of that type to use. If a particular device
// type is not found in the map, the system picks an appropriate
// number.
map<string, int32> device_count = 4
[ (gogoproto.moretags) = "yaml:\"device_count,omitempty\"" ];
// Options that apply to all CPUs.
CPUOptions cpu_options = 5
[ (gogoproto.moretags) = "yaml:\"cpu_options,omitempty\"" ];
// Options that apply to all GPUs.
GPUOptions gpu_options = 6
[ (gogoproto.moretags) = "yaml:\"gpu_options,omitempty\"" ];
}
message PredictionOptions {
option (gogoproto.description) = true;
string request_id = 1 [
(gogoproto.customname) = "RequestID",
(gogoproto.jsontag) = "request_id,omitempty",
(gogoproto.moretags) = "yaml:\"request_id,omitempty\""
];
int32 feature_limit = 2
[ (gogoproto.moretags) = "yaml:\"feature_limit,omitempty\"" ];
int32 batch_size = 3
[ (gogoproto.moretags) = "yaml:\"batch_size,omitempty\"" ];
ExecutionOptions execution_options = 4
[ (gogoproto.moretags) = "yaml:\"execution_options,omitempty\"" ];
string agent = 5 [ (gogoproto.moretags) = "yaml:\"agent,omitempty\"" ];
string gpu_metrics = 6
[ (gogoproto.moretags) = "yaml:\"gpu_metrics,omitempty\"" ];
}
message PredictorOpenRequest {
option (gogoproto.description) = true;
string model_name = 1
[ (gogoproto.moretags) = "yaml:\"model_name,omitempty\"" ];
string model_version = 2
[ (gogoproto.moretags) = "yaml:\"model_version,omitempty\"" ];
string framework_name = 3
[ (gogoproto.moretags) = "yaml:\"framework_name,omitempty\"" ];
string framework_version = 4
[ (gogoproto.moretags) = "yaml:\"framework_version,omitempty\"" ];
bool persist = 5 [ (gogoproto.moretags) = "yaml:\"persist,omitempty\"" ];
PredictionOptions options = 6
[ (gogoproto.moretags) = "yaml:\"options,omitempty\"" ];
}
message PredictorCloseRequest {
option (gogoproto.description) = true;
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
bool force = 2 [ (gogoproto.moretags) = "yaml:\"force,omitempty\"" ];
}
message PredictorCloseResponse {
google.protobuf.Duration duration = 1
[ (gogoproto.moretags) = "yaml:\"duration,omitempty\"" , (gogoproto.stdduration) = true];
}
message Predictor {
option (gogoproto.description) = true;
string id = 1 [
(gogoproto.customname) = "ID",
(gogoproto.jsontag) = "id,omitempty",
(gogoproto.moretags) = "yaml:\"id,omitempty\""
];
google.protobuf.Duration duration = 2
[ (gogoproto.moretags) = "yaml:\"duration,omitempty\"" , (gogoproto.stdduration) = true];
}
message URLsRequest {
message URL {
// An id used to identify the output feature: maps to input_id for output
string id = 1 [
(gogoproto.customname) = "ID",
(gogoproto.jsontag) = "id,omitempty",
(gogoproto.moretags) = "yaml:\"id,omitempty\""
];
string data = 2 [ (gogoproto.moretags) = "yaml:\"data,omitempty\"" ];
}
option (gogoproto.description) = true;
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
repeated URL urls = 2 [ (gogoproto.moretags) = "yaml:\"urls,omitempty\"" ];
PredictionOptions options = 3
[ (gogoproto.moretags) = "yaml:\"options,omitempty\"" ];
}
message ImagesRequest {
option (gogoproto.description) = true;
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
// A list of Base64 encoded images
repeated Image images = 2
[ (gogoproto.moretags) = "yaml:\"images,omitempty\"" ];
PredictionOptions options = 3
[ (gogoproto.moretags) = "yaml:\"options,omitempty\"" ];
}
message TextRequest {
option (gogoproto.description) = true;
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
// A list of Base64 encoded texts
repeated Text texts = 2 [ (gogoproto.moretags) = "yaml:\"texts,omitempty\"" ];
PredictionOptions options = 3
[ (gogoproto.moretags) = "yaml:\"options,omitempty\"" ];
}
message DatasetRequest {
message Dataset {
string category = 1
[ (gogoproto.moretags) = "yaml:\"category,omitempty\"" ];
string name = 2 [ (gogoproto.moretags) = "yaml:\"name,omitempty\"" ];
}
option (gogoproto.description) = true;
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
Dataset dataset = 2 [ (gogoproto.moretags) = "yaml:\"dataset,omitempty\"" ];
PredictionOptions options = 3
[ (gogoproto.moretags) = "yaml:\"options,omitempty\"" ];
}
message FeatureResponse {
option (gogoproto.description) = true;
string id = 1 [
(gogoproto.customname) = "ID",
(gogoproto.jsontag) = "id,omitempty",
(gogoproto.moretags) = "yaml:\"id,omitempty\""
];
string request_id = 2 [
(gogoproto.customname) = "RequestID",
(gogoproto.jsontag) = "request_id,omitempty",
(gogoproto.moretags) = "yaml:\"request_id,omitempty\""
];
string input_id = 3 [
(gogoproto.customname) = "InputID",
(gogoproto.jsontag) = "input_id,omitempty",
(gogoproto.moretags) = "yaml:\"input_id,omitempty\""
];
repeated Feature features = 4
[ (gogoproto.moretags) = "yaml:\"features,omitempty\"" ];
map<string, string> metadata = 5
[ (gogoproto.moretags) = "yaml:\"metadata,omitempty\"" ];
}
message FeaturesResponse {
option (gogoproto.description) = true;
string id = 1 [
(gogoproto.customname) = "ID",
(gogoproto.jsontag) = "id,omitempty",
(gogoproto.moretags) = "yaml:\"id,omitempty\""
];
TraceID trace_id = 2 [ (gogoproto.moretags) = "yaml:\"trace_id,omitempty\"" ];
repeated FeatureResponse responses = 3
[ (gogoproto.moretags) = "yaml:\"responses,omitempty\"" ];
google.protobuf.Duration duration_for_inference = 4
[ (gogoproto.moretags) = "yaml:\"duration_for_inference,omitempty\"", (gogoproto.stdduration) = true ];
google.protobuf.Duration duration = 5
[ (gogoproto.moretags) = "yaml:\"duration,omitempty\"" , (gogoproto.stdduration) = true];
}
message ResetRequest {
option (gogoproto.description) = true;
string id = 2 [
(gogoproto.customname) = "ID",
(gogoproto.jsontag) = "id,omitempty",
(gogoproto.moretags) = "yaml:\"id,omitempty\""
];
Predictor predictor = 1
[ (gogoproto.moretags) = "yaml:\"predictor,omitempty\"" ];
}
message ResetResponse {
option (gogoproto.description) = true;
Predictor predictor = 1 [
(gogoproto.embed) = true,
(gogoproto.moretags) = "yaml:\"predictor,omitempty\""
];
}
service Predict {
// Opens a predictor and returns an id where the predictor
// is accessible. The id can be used to perform inference
// requests.
rpc Open(PredictorOpenRequest) returns (Predictor) {
option (google.api.http) = {
post : "/predict/open",
body : "*"
};
}
// rpc Information(Predictor) returns (PredictorInformation) {
// option (google.api.http) = {
// post : "/predict/information",
// body : "*"
// };
// }
// Close a predictor clear it's memory.
rpc Close(PredictorCloseRequest) returns (PredictorCloseResponse) {
option (google.api.http) = {
post : "/predict/close",
body : "*"
};
}
// URLs method receives a list of urls and runs
// the predictor on all the urls.
// The result is a list of predicted features for all the urls.
rpc URLs(URLsRequest) returns (FeaturesResponse) {
option (google.api.http) = {
post : "/predict/urls",
body : "*"
};
}
// URLsStream method receives a stream of urls and runs
// the predictor on all the urls.
// The result is a prediction feature stream for each url.
// rpc URLsStream(URLsRequest) returns (stream FeatureResponse) {
// option (google.api.http) = {
// post : "/predict/stream/urls",
// body : "*"
// };
// }
// Images method receives a list of base64 encoded images and runs
// the predictor on all the images.
// The result is a prediction feature list for each image.
rpc Images(ImagesRequest) returns (FeaturesResponse) {
option (google.api.http) = {
post : "/predict/images",
body : "*"
};
}
// ImagesStream method receives a list base64 encoded images and runs
// the predictor on all the images.
// The result is a prediction feature stream for each image.
// rpc ImagesStream(ImagesRequest) returns (stream FeatureResponse) {
// option (google.api.http) = {
// post : "/predict/stream/images",
// body : "*"
// };
// }
// // Text method receives a list base64 encoded texts and runs
// // the predictor on all the texts.
// // The result is a prediction feature list for each text.
// rpc Texts(TextRequest) returns (FeaturesResponse) {
// option (google.api.http) = {
// post : "/predict/text",
// body : "*"
// };
// }
// // Text method receives a list base64 encoded texts and runs
// // the predictor on all the texts.
// // The result is a prediction feature stream for each text.
// rpc TextsStream(TextRequest) returns (stream FeatureResponse) {
// option (google.api.http) = {
// post : "/predict/stream/text",
// body : "*"
// };
// }
// Dataset method receives a single dataset and runs
// the predictor on all elements of the dataset.
// The result is a prediction feature list.
rpc Dataset(DatasetRequest) returns (FeaturesResponse) {
option (google.api.http) = {
post : "/predict/dataset",
body : "*"
};
}
// Dataset method receives a single dataset and runs
// the predictor on all elements of the dataset.
// The result is a prediction feature stream.
// rpc DatasetStream(DatasetRequest) returns (stream FeatureResponse) {
// option (google.api.http) = {
// post : "/predict/stream/dataset",
// body : "*"
// };
// }
// Reset method clears the internal cache of the predictors
rpc Reset(ResetRequest) returns (ResetResponse) {
option (google.api.http) = {
post : "/predict/reset",
body : "*"
};
}
}