bentoml/grpc/v1/service.proto

syntax = "proto3";

package bentoml.grpc.v1;

import "bentoml/grpc/v1/struct.proto";
import "google/protobuf/any.proto";
import "google/rpc/error_details.proto";
import "google/rpc/status.proto";

// cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only)
option cc_enable_arenas = true;
option cc_generic_services = true;
option go_package = "github.com/bentoml/grpc/v1";
option java_multiple_files = true;
option java_outer_classname = "ServiceProto";
option java_package = "com.bentoml.grpc.v1";
option objc_class_prefix = "SVC";
option py_generic_services = true;

// a gRPC BentoServer.
service BentoService {
  // Check server liveliness.
  rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}

  // Check server readiness
  rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}

  // Inference handles unary API.
  rpc Inference(InferenceRequest) returns (InferenceResponse) {}
}

// request for ServerLive that takes no arguments.
message ServerLiveRequest {}

// response for ServerLive returns a boolean determine server's liveliness.
message ServerLiveResponse {
  bool live = 1;
}

// request for ServerReady that takes no arguments.
message ServerReadyRequest {}

// response for ServerReady returns a boolean determine server's readiness.
message ServerReadyResponse {
  bool ready = 1;
}

// Request for Inference.
message InferenceRequest {
  // a given API route the rpc request is sent to.
  string api_name = 1;
  string api_version = 2;

  repeated bentoml.grpc.v1.ContentsProto contents = 3;

  // The data contained in an input can be represented in
  // "raw" bytes form or in the repeated type that matches the
  // data type.
  // Using the "raw" bytes form will typically allow higher performance due to the way protobuf
  // allocation and reuse interacts with GRPC.
  // For example, see https://github.com/grpc/grpc/issues/23231.
  //
  // To use the raw representation 'raw_input_contents' must be
  // initialized with data for each tensor in the same order as
  // 'inputs'. For each tensor, the size of this content must
  // match what is expected by the tensor's shape and data
  // type. The raw data must be the flattened, one-dimensional,
  // row-major order of the tensor elements without any stride
  // or padding between the elements.
  //
  // Note that the FP16 and BF16 data
  // types must be represented as raw content as there is no
  // specific data type for a 16-bit float type.
  //
  // If this field is specified then contents must not be specified for any input tensor.
  repeated bytes raw_bytes_contents = 4;

  // dataframes_columns and dataframe_indices are used
  // in conjunction with contents to represent a dataframe.
  // Recommendation: for better performance, use raw_bytes_contents in
  // conjunction with the below fields.
  repeated string dataframe_columns = 101;
  repeated string dataframe_indices = 102;
}

// Response from Inference.
message InferenceResponse {
  // representation of the output value.
  repeated bentoml.grpc.v1.ContentsProto contents = 1;

  // The data contained in an output can be represented in
  // "raw" bytes form or in the repeated type that matches the
  // data type.
  // Using the "raw" bytes form will typically allow higher performance due to the way protobuf
  // allocation and reuse interacts with GRPC.
  // For example, see https://github.com/grpc/grpc/issues/23231.
  //
  // To use the raw representation 'raw_input_contents' must be
  // initialized with data for each tensor in the same order as
  // 'inputs'. For each tensor, the size of this content must
  // match what is expected by the tensor's shape and data
  // type. The raw data must be the flattened, one-dimensional,
  // row-major order of the tensor elements without any stride
  // or padding between the elements.
  //
  // Note that the FP16 and BF16 data
  // types must be represented as raw content as there is no
  // specific data type for a 16-bit float type.
  //
  // If this field is specified then contents must not be specified for any input tensor.
  repeated bytes raw_bytes_contents = 2;

  // Sends a rpc status back to the client.
  google.rpc.Status status = 3;

  // the response should also include an error message type
  oneof errors {
    google.rpc.RetryInfo retry_info = 100;
    google.rpc.DebugInfo debug_info = 101;
    google.rpc.QuotaFailure quota_failure = 102;
    google.rpc.ErrorInfo error_info = 103;
    google.rpc.PreconditionFailure precondition_failure = 104;
    google.rpc.BadRequest bad_request = 105;
    google.rpc.RequestInfo request_info = 106;
    google.rpc.LocalizedMessage localized_message = 107;
  }
}