Skip to content

Commit

Permalink
feat: implement new serve API (#3696)
Browse files Browse the repository at this point in the history
Co-authored-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
Co-authored-by: Sean Sheng <s3sheng@gmail.com>
  • Loading branch information
3 people authored Apr 13, 2023
1 parent b50c97d commit 5139dea
Show file tree
Hide file tree
Showing 9 changed files with 477 additions and 144 deletions.
7 changes: 7 additions & 0 deletions src/bentoml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
from .bentos import export_bento
from .bentos import import_bento

# server API
from .server import GrpcServer
from .server import HTTPServer

# BentoML built-in types
from ._internal.tag import Tag
from ._internal.bento import Bento
Expand Down Expand Up @@ -170,6 +174,9 @@
"serve",
"Bento",
"exceptions",
# server APIs
"HTTPServer",
"GrpcServer",
# Framework specific modules
"catboost",
"detectron",
Expand Down
2 changes: 1 addition & 1 deletion src/bentoml/_internal/client/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _create_sync_channel(
def wait_until_server_ready(
host: str,
port: int,
timeout: int = 30,
timeout: float = 30,
check_interval: int = 1,
# set kwargs here to omit gRPC kwargs
**kwargs: t.Any,
Expand Down
4 changes: 2 additions & 2 deletions src/bentoml/_internal/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class HTTPClient(Client):
def wait_until_server_ready(
host: str,
port: int,
timeout: int = 30,
timeout: float = 30,
check_interval: int = 1,
# set kwargs here to omit gRPC kwargs
**kwargs: t.Any,
Expand Down Expand Up @@ -74,7 +74,7 @@ def wait_until_server_ready(
ConnectionRefusedError,
TimeoutError,
) as err:
logger.error("Caught exception while connecting to %s:%s:", host, port)
logger.error("Timed out while connecting to %s:%s:", host, port)
logger.error(err)
raise

Expand Down
63 changes: 0 additions & 63 deletions src/bentoml/_internal/server/server.py

This file was deleted.

131 changes: 56 additions & 75 deletions src/bentoml/bentos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@

from __future__ import annotations

import sys
import typing as t
import logging
import subprocess

from simple_di import inject
from simple_di import Provide

from .exceptions import BadInput
from .exceptions import InvalidArgument
from .exceptions import BentoMLException
from ._internal.tag import Tag
Expand All @@ -21,9 +20,10 @@
from ._internal.configuration.containers import BentoMLContainer

if t.TYPE_CHECKING:
from .server import Server
from ._internal.bento import BentoStore
from ._internal.yatai_client import YataiClient
from ._internal.server.server import ServerHandle


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -441,81 +441,62 @@ def serve(
max_concurrent_streams: int
| None = Provide[BentoMLContainer.grpc.max_concurrent_streams],
grpc_protocol_version: str | None = None,
) -> ServerHandle:
from .serve import construct_ssl_args
from ._internal.server.server import ServerHandle

if isinstance(bento, Bento):
bento = str(bento.tag)
elif isinstance(bento, Tag):
bento = str(bento)

server_type = server_type.lower()
if server_type not in ["http", "grpc"]:
raise ValueError('Server type must either be "http" or "grpc"')

ssl_args: dict[str, t.Any] = {
"ssl_certfile": ssl_certfile,
"ssl_keyfile": ssl_keyfile,
"ssl_ca_certs": ssl_ca_certs,
}
) -> Server:
logger.warning(
"bentoml.serve and bentoml.bentos.serve are deprecated; use bentoml.Server instead."
)

if server_type == "http":
serve_cmd = "serve-http"
from .server import HTTPServer

if host is None:
host = BentoMLContainer.http.host.get()
host = t.cast(str, BentoMLContainer.http.host.get())
if port is None:
port = BentoMLContainer.http.port.get()

ssl_args.update(
ssl_keyfile_password=ssl_keyfile_password,
ssl_version=ssl_version,
ssl_cert_reqs=ssl_cert_reqs,
ssl_ciphers=ssl_ciphers,
port = t.cast(int, BentoMLContainer.http.port.get())

res = HTTPServer(
bento,
reload,
production,
env,
host,
port,
working_dir,
api_workers,
backlog,
ssl_certfile,
ssl_keyfile,
ssl_keyfile_password,
ssl_version,
ssl_cert_reqs,
ssl_ca_certs,
ssl_ciphers,
)
else:
serve_cmd = "serve-grpc"
elif server_type == "grpc":
from .server import GrpcServer

if host is None:
host = BentoMLContainer.grpc.host.get()
host = t.cast(str, BentoMLContainer.grpc.host.get())
if port is None:
port = BentoMLContainer.grpc.port.get()

assert host is not None and port is not None
args: t.List[str] = [
sys.executable,
"-m",
"bentoml",
serve_cmd,
bento,
"--host",
host,
"--port",
str(port),
"--backlog",
str(backlog),
*construct_ssl_args(**ssl_args),
]
if production:
args.append("--production")
if reload:
args.append("--reload")
if env:
args.extend(["--env", env])

if api_workers is not None:
args.extend(["--api-workers", str(api_workers)])
if working_dir is not None:
args.extend(["--working-dir", str(working_dir)])
if enable_reflection:
args.append("--enable-reflection")
if enable_channelz:
args.append("--enable-channelz")
if max_concurrent_streams is not None:
args.extend(["--max-concurrent-streams", str(max_concurrent_streams)])

if grpc_protocol_version is not None:
assert (
server_type == "grpc"
), f"'grpc_protocol_version' should only be passed to gRPC server, got '{server_type}' instead."
args.extend(["--protocol-version", str(grpc_protocol_version)])

return ServerHandle(process=subprocess.Popen(args), host=host, port=port)
port = t.cast(int, BentoMLContainer.grpc.port.get())

res = GrpcServer(
bento,
reload,
production,
env,
host,
port,
working_dir,
api_workers,
backlog,
enable_reflection,
enable_channelz,
max_concurrent_streams,
grpc_protocol_version,
)
else:
raise BadInput(f"Unknown server type: '{server_type}'")

res.start()
return res
Loading

0 comments on commit 5139dea

Please sign in to comment.