From b74c45896075ddca656078dd352c08f1a91b0178 Mon Sep 17 00:00:00 2001 From: AlexsanderHamir Date: Tue, 7 Oct 2025 15:41:24 -0700 Subject: [PATCH 1/2] add: network config option --- litellm/proxy/proxy_cli.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 21e2c54ec74f..11a43505bf49 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -121,6 +121,8 @@ def _get_default_unvicorn_init_args( port: int, log_config: Optional[str] = None, keepalive_timeout: Optional[int] = None, + limit_concurrency: Optional[int] = None, + backlog: Optional[int] = None, ) -> dict: """ Get the arguments for `uvicorn` worker @@ -140,6 +142,10 @@ def _get_default_unvicorn_init_args( uvicorn_args["log_config"] = None if keepalive_timeout is not None: uvicorn_args["timeout_keep_alive"] = keepalive_timeout + if limit_concurrency is not None: + uvicorn_args["limit_concurrency"] = limit_concurrency + if backlog is not None: + uvicorn_args["backlog"] = backlog return uvicorn_args @staticmethod @@ -498,6 +504,20 @@ def _get_loop_type(): help="Restart worker after this many requests (uvicorn: limit_max_requests, gunicorn: max_requests)", envvar="MAX_REQUESTS_BEFORE_RESTART", ) +@click.option( + "--limit_concurrency", + default=None, + type=int, + help="Set the maximum number of concurrent requests to the proxy (uvicorn limit_concurrency parameter)", + envvar="LIMIT_CONCURRENCY", +) +@click.option( + "--backlog", + default=None, + type=int, + help="Set the maximum number of pending connections (uvicorn backlog parameter)", + envvar="BACKLOG", +) def run_server( # noqa: PLR0915 host, port, @@ -537,6 +557,8 @@ def run_server( # noqa: PLR0915 skip_server_startup, keepalive_timeout, max_requests_before_restart, + limit_concurrency, + backlog, ): args = locals() if local: @@ -825,6 +847,8 @@ def run_server( # noqa: PLR0915 port=port, log_config=log_config, keepalive_timeout=keepalive_timeout, + limit_concurrency=limit_concurrency, + backlog=backlog, ) # Optional: recycle uvicorn workers after N requests if max_requests_before_restart is not None: From 394bd53da83762da23fbc84dc1ec2a5dafb85ff5 Mon Sep 17 00:00:00 2001 From: AlexsanderHamir Date: Tue, 7 Oct 2025 16:35:39 -0700 Subject: [PATCH 2/2] add: production defaults --- litellm/proxy/proxy_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 11a43505bf49..3fa6a9ffeab0 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -506,14 +506,14 @@ def _get_loop_type(): ) @click.option( "--limit_concurrency", - default=None, + default=10000, type=int, help="Set the maximum number of concurrent requests to the proxy (uvicorn limit_concurrency parameter)", envvar="LIMIT_CONCURRENCY", ) @click.option( "--backlog", - default=None, + default=2048, type=int, help="Set the maximum number of pending connections (uvicorn backlog parameter)", envvar="BACKLOG",