Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions litellm/proxy/proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def _get_default_unvicorn_init_args(
port: int,
log_config: Optional[str] = None,
keepalive_timeout: Optional[int] = None,
limit_concurrency: Optional[int] = None,
backlog: Optional[int] = None,
) -> dict:
"""
Get the arguments for `uvicorn` worker
Expand All @@ -140,6 +142,10 @@ def _get_default_unvicorn_init_args(
uvicorn_args["log_config"] = None
if keepalive_timeout is not None:
uvicorn_args["timeout_keep_alive"] = keepalive_timeout
if limit_concurrency is not None:
uvicorn_args["limit_concurrency"] = limit_concurrency
if backlog is not None:
uvicorn_args["backlog"] = backlog
return uvicorn_args

@staticmethod
Expand Down Expand Up @@ -498,6 +504,20 @@ def _get_loop_type():
help="Restart worker after this many requests (uvicorn: limit_max_requests, gunicorn: max_requests)",
envvar="MAX_REQUESTS_BEFORE_RESTART",
)
@click.option(
"--limit_concurrency",
default=10000,
type=int,
help="Set the maximum number of concurrent requests to the proxy (uvicorn limit_concurrency parameter)",
envvar="LIMIT_CONCURRENCY",
)
@click.option(
"--backlog",
default=2048,
type=int,
help="Set the maximum number of pending connections (uvicorn backlog parameter)",
envvar="BACKLOG",
)
def run_server( # noqa: PLR0915
host,
port,
Expand Down Expand Up @@ -537,6 +557,8 @@ def run_server( # noqa: PLR0915
skip_server_startup,
keepalive_timeout,
max_requests_before_restart,
limit_concurrency,
backlog,
):
args = locals()
if local:
Expand Down Expand Up @@ -825,6 +847,8 @@ def run_server( # noqa: PLR0915
port=port,
log_config=log_config,
keepalive_timeout=keepalive_timeout,
limit_concurrency=limit_concurrency,
backlog=backlog,
)
# Optional: recycle uvicorn workers after N requests
if max_requests_before_restart is not None:
Expand Down
Loading