-
Notifications
You must be signed in to change notification settings - Fork 756
/
file.py
279 lines (215 loc) · 9.49 KB
/
file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
from __future__ import annotations
import io
import os
import typing as t
import logging
from typing import TYPE_CHECKING
from starlette.requests import Request
from multipart.multipart import parse_options_header
from starlette.responses import Response
from starlette.datastructures import UploadFile
from .base import IODescriptor
from ..types import FileLike
from ..utils.http import set_cookies
from ...exceptions import BadInput
from ...exceptions import InvalidArgument
from ...exceptions import BentoMLException
from ..service.openapi import SUCCESS_DESCRIPTION
from ..service.openapi.specification import Schema
from ..service.openapi.specification import MediaType
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from typing_extensions import Self
from bentoml.grpc.v1alpha1 import service_pb2 as pb
from .base import OpenAPIResponse
from ..context import InferenceApiContext as Context
FileKind: t.TypeAlias = t.Literal["binaryio", "textio"]
else:
from bentoml.grpc.utils import import_generated_stubs
pb, _ = import_generated_stubs()
FileType = t.Union[io.IOBase, t.IO[bytes], FileLike[bytes]]
class File(IODescriptor[FileType], descriptor_id="bentoml.io.File"):
"""
:obj:`File` defines API specification for the inputs/outputs of a Service, where either
inputs will be converted to or outputs will be converted from file-like objects as
specified in your API function signature.
A sample ViT service:
.. code-block:: python
:caption: `service.py`
from __future__ import annotations
import io
from typing import TYPE_CHECKING
from typing import Any
import bentoml
from bentoml.io import File
if TYPE_CHECKING:
from numpy.typing import NDArray
runner = bentoml.tensorflow.get('image-classification:latest').to_runner()
svc = bentoml.Service("vit-pdf-classifier", runners=[runner])
@svc.api(input=File(), output=NumpyNdarray(dtype="float32"))
async def predict(input_pdf: io.BytesIO[Any]) -> NDArray[Any]:
return await runner.async_run(input_pdf)
Users then can then serve this service with :code:`bentoml serve`:
.. code-block:: bash
% bentoml serve ./service.py:svc --reload
Users can then send requests to the newly started services with any client:
.. tab-set::
.. tab-item:: Bash
.. code-block:: bash
% curl -H "Content-Type: multipart/form-data" \\
-F 'fileobj=@test.pdf;type=application/pdf' \\
http://0.0.0.0:3000/predict
.. tab-item:: Python
.. code-block:: python
:caption: `request.py`
import requests
requests.post(
"http://0.0.0.0:3000/predict",
files = {"upload_file": open('test.pdf', 'rb')},
headers = {"content-type": "multipart/form-data"}
).text
Args:
kind: The kind of file-like object to be used. Currently, the only accepted value is ``binaryio``.
mime_type: Return MIME type of the :code:`starlette.response.Response`, only available when used as output descriptor
Returns:
:obj:`File`: IO Descriptor that represents file-like objects.
"""
_proto_fields = ("file",)
def __new__(cls, kind: FileKind = "binaryio", mime_type: str | None = None) -> File:
mime_type = mime_type if mime_type is not None else "application/octet-stream"
if kind == "binaryio":
res = object.__new__(BytesIOFile)
else:
raise ValueError(f"invalid File kind '{kind}'")
res._mime_type = mime_type
return res
@classmethod
def from_sample(cls, sample: FileType | str, kind: FileKind = "binaryio") -> Self:
import filetype
mime_type: str | None = filetype.guess_mime(sample)
kls = cls(kind=kind, mime_type=mime_type)
if isinstance(sample, FileLike):
kls.sample = sample
elif isinstance(sample, t.IO):
kls.sample = FileLike[bytes](sample, "<sample>")
elif isinstance(sample, str) and os.path.exists(sample):
with open(sample, "rb") as f:
kls.sample = FileLike[bytes](f, "<sample>")
else:
raise InvalidArgument(f"Unknown sample type: '{sample}'")
return kls
@classmethod
def from_spec(cls, spec: dict[str, t.Any]) -> Self:
if "args" not in spec:
raise InvalidArgument(f"Missing args key in File spec: {spec}")
return cls(**spec["args"])
def input_type(self) -> t.Type[t.Any]:
return FileLike[bytes]
def openapi_schema(self) -> Schema:
return Schema(type="string", format="binary")
def openapi_components(self) -> dict[str, t.Any] | None:
pass
def openapi_request_body(self) -> dict[str, t.Any]:
return {
"content": {self._mime_type: MediaType(schema=self.openapi_schema())},
"required": True,
"x-bentoml-io-descriptor": self.to_spec(),
}
def openapi_responses(self) -> OpenAPIResponse:
return {
"description": SUCCESS_DESCRIPTION,
"content": {self._mime_type: MediaType(schema=self.openapi_schema())},
"x-bentoml-io-descriptor": self.to_spec(),
}
async def to_http_response(self, obj: FileType, ctx: Context | None = None):
if isinstance(obj, bytes):
body = obj
else:
body = obj.read()
if ctx is not None:
res = Response(
body,
headers=ctx.response.metadata, # type: ignore (bad starlette types)
status_code=ctx.response.status_code,
)
set_cookies(res, ctx.response.cookies)
else:
res = Response(body)
return res
async def to_proto(self, obj: FileType) -> pb.File:
from bentoml.grpc.utils import mimetype_to_filetype_pb_map
if isinstance(obj, bytes):
body = obj
else:
body = obj.read()
try:
kind = mimetype_to_filetype_pb_map()[self._mime_type]
except KeyError:
raise BadInput(
f"{self._mime_type} doesn't have a corresponding File 'kind'"
) from None
return pb.File(kind=kind, content=body)
async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]:
raise NotImplementedError
async def from_http_request(self, request: Request) -> FileLike[bytes]:
raise NotImplementedError
def to_spec(self) -> dict[str, t.Any]:
raise NotImplementedError
class BytesIOFile(File, descriptor_id=None):
def to_spec(self) -> dict[str, t.Any]:
return {
"id": super().descriptor_id,
"args": {
"kind": "binaryio",
"mime_type": self._mime_type,
},
}
async def from_http_request(self, request: Request) -> FileLike[bytes]:
content_type, _ = parse_options_header(request.headers["content-type"])
if content_type.decode("utf-8") == "multipart/form-data":
form = await request.form()
found_mimes: t.List[str] = []
val: t.Union[str, UploadFile]
for val in form.values(): # type: ignore
if isinstance(val, UploadFile):
found_mimes.append(val.content_type) # type: ignore (bad starlette types)
if val.content_type == self._mime_type: # type: ignore (bad starlette types)
res = FileLike[bytes](val.file, val.filename) # type: ignore (bad starlette types)
break
else:
if len(found_mimes) == 0:
raise BentoMLException("no File found in multipart form")
else:
raise BentoMLException(
f"multipart File should have Content-Type '{self._mime_type}', got files with content types {', '.join(found_mimes)}"
)
return res # type: ignore
if content_type.decode("utf-8") == self._mime_type:
body = await request.body()
return FileLike[bytes](io.BytesIO(body), "<request body>")
raise BentoMLException(
f"File should have Content-Type '{self._mime_type}' or 'multipart/form-data', got {content_type} instead"
)
async def from_proto(self, field: pb.File | bytes) -> FileLike[bytes]:
from bentoml.grpc.utils import filetype_pb_to_mimetype_map
mapping = filetype_pb_to_mimetype_map()
# check if the request message has the correct field
if isinstance(field, bytes):
content = field
else:
assert isinstance(field, pb.File)
if field.kind:
try:
mime_type = mapping[field.kind]
if mime_type != self._mime_type:
raise BadInput(
f"Inferred mime_type from 'kind' is '{mime_type}', while '{self!r}' is expecting '{self._mime_type}'",
)
except KeyError:
raise BadInput(
f"{field.kind} is not a valid File kind. Accepted file kind: {[names for names,_ in pb.File.FileType.items()]}",
) from None
content = field.content
if not content:
raise BadInput("Content is empty!") from None
return FileLike[bytes](io.BytesIO(content), "<content>")