/
service.proto
127 lines (109 loc) · 4.59 KB
/
service.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
syntax = "proto3";
package bentoml.grpc.v1;
import "bentoml/grpc/v1/struct.proto";
import "google/protobuf/any.proto";
import "google/rpc/error_details.proto";
import "google/rpc/status.proto";
// cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only)
option cc_enable_arenas = true;
option cc_generic_services = true;
option go_package = "github.com/bentoml/grpc/v1";
option java_multiple_files = true;
option java_outer_classname = "ServiceProto";
option java_package = "com.bentoml.grpc.v1";
option objc_class_prefix = "SVC";
option py_generic_services = true;
// a gRPC BentoServer.
service BentoService {
// Check server liveliness.
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
// Check server readiness
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
// Inference handles unary API.
rpc Inference(InferenceRequest) returns (InferenceResponse) {}
}
// request for ServerLive that takes no arguments.
message ServerLiveRequest {}
// response for ServerLive returns a boolean determine server's liveliness.
message ServerLiveResponse {
bool live = 1;
}
// request for ServerReady that takes no arguments.
message ServerReadyRequest {}
// response for ServerReady returns a boolean determine server's readiness.
message ServerReadyResponse {
bool ready = 1;
}
// Request for Inference.
message InferenceRequest {
// a given API route the rpc request is sent to.
string api_name = 1;
string api_version = 2;
repeated bentoml.grpc.v1.ContentsProto contents = 3;
// The data contained in an input can be represented in
// "raw" bytes form or in the repeated type that matches the
// data type.
// Using the "raw" bytes form will typically allow higher performance due to the way protobuf
// allocation and reuse interacts with GRPC.
// For example, see https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_input_contents' must be
// initialized with data for each tensor in the same order as
// 'inputs'. For each tensor, the size of this content must
// match what is expected by the tensor's shape and data
// type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride
// or padding between the elements.
//
// Note that the FP16 and BF16 data
// types must be represented as raw content as there is no
// specific data type for a 16-bit float type.
//
// If this field is specified then contents must not be specified for any input tensor.
repeated bytes raw_bytes_contents = 4;
// dataframes_columns and dataframe_indices are used
// in conjunction with contents to represent a dataframe.
// Recommendation: for better performance, use raw_bytes_contents in
// conjunction with the below fields.
repeated string dataframe_columns = 101;
repeated string dataframe_indices = 102;
}
// Response from Inference.
message InferenceResponse {
// representation of the output value.
repeated bentoml.grpc.v1.ContentsProto contents = 1;
// The data contained in an output can be represented in
// "raw" bytes form or in the repeated type that matches the
// data type.
// Using the "raw" bytes form will typically allow higher performance due to the way protobuf
// allocation and reuse interacts with GRPC.
// For example, see https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_input_contents' must be
// initialized with data for each tensor in the same order as
// 'inputs'. For each tensor, the size of this content must
// match what is expected by the tensor's shape and data
// type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride
// or padding between the elements.
//
// Note that the FP16 and BF16 data
// types must be represented as raw content as there is no
// specific data type for a 16-bit float type.
//
// If this field is specified then contents must not be specified for any input tensor.
repeated bytes raw_bytes_contents = 2;
// Sends a rpc status back to the client.
google.rpc.Status status = 3;
// the response should also include an error message type
oneof errors {
google.rpc.RetryInfo retry_info = 100;
google.rpc.DebugInfo debug_info = 101;
google.rpc.QuotaFailure quota_failure = 102;
google.rpc.ErrorInfo error_info = 103;
google.rpc.PreconditionFailure precondition_failure = 104;
google.rpc.BadRequest bad_request = 105;
google.rpc.RequestInfo request_info = 106;
google.rpc.LocalizedMessage localized_message = 107;
}
}