From b74c45896075ddca656078dd352c08f1a91b0178 Mon Sep 17 00:00:00 2001
From: AlexsanderHamir <alexsanderhamirgomesbaptista@gmail.com>
Date: Tue, 7 Oct 2025 15:41:24 -0700
Subject: [PATCH 1/2] add: network config option

---
 litellm/proxy/proxy_cli.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 21e2c54ec74f..11a43505bf49 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -121,6 +121,8 @@ def _get_default_unvicorn_init_args(
         port: int,
         log_config: Optional[str] = None,
         keepalive_timeout: Optional[int] = None,
+        limit_concurrency: Optional[int] = None,
+        backlog: Optional[int] = None,
     ) -> dict:
         """
         Get the arguments for `uvicorn` worker
@@ -140,6 +142,10 @@ def _get_default_unvicorn_init_args(
             uvicorn_args["log_config"] = None
         if keepalive_timeout is not None:
             uvicorn_args["timeout_keep_alive"] = keepalive_timeout
+        if limit_concurrency is not None:
+            uvicorn_args["limit_concurrency"] = limit_concurrency
+        if backlog is not None:
+            uvicorn_args["backlog"] = backlog
         return uvicorn_args
 
     @staticmethod
@@ -498,6 +504,20 @@ def _get_loop_type():
     help="Restart worker after this many requests (uvicorn: limit_max_requests, gunicorn: max_requests)",
     envvar="MAX_REQUESTS_BEFORE_RESTART",
 )
+@click.option(
+    "--limit_concurrency",
+    default=None,
+    type=int,
+    help="Set the maximum number of concurrent requests to the proxy (uvicorn limit_concurrency parameter)",
+    envvar="LIMIT_CONCURRENCY",
+)
+@click.option(
+    "--backlog",
+    default=None,
+    type=int,
+    help="Set the maximum number of pending connections (uvicorn backlog parameter)",
+    envvar="BACKLOG",
+)
 def run_server(  # noqa: PLR0915
     host,
     port,
@@ -537,6 +557,8 @@ def run_server(  # noqa: PLR0915
     skip_server_startup,
     keepalive_timeout,
     max_requests_before_restart,
+    limit_concurrency,
+    backlog,
 ):
     args = locals()
     if local:
@@ -825,6 +847,8 @@ def run_server(  # noqa: PLR0915
             port=port,
             log_config=log_config,
             keepalive_timeout=keepalive_timeout,
+            limit_concurrency=limit_concurrency,
+            backlog=backlog,
         )
         # Optional: recycle uvicorn workers after N requests
         if max_requests_before_restart is not None:

From 394bd53da83762da23fbc84dc1ec2a5dafb85ff5 Mon Sep 17 00:00:00 2001
From: AlexsanderHamir <alexsanderhamirgomesbaptista@gmail.com>
Date: Tue, 7 Oct 2025 16:35:39 -0700
Subject: [PATCH 2/2] add: production defaults

---
 litellm/proxy/proxy_cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 11a43505bf49..3fa6a9ffeab0 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -506,14 +506,14 @@ def _get_loop_type():
 )
 @click.option(
     "--limit_concurrency",
-    default=None,
+    default=10000,
     type=int,
     help="Set the maximum number of concurrent requests to the proxy (uvicorn limit_concurrency parameter)",
     envvar="LIMIT_CONCURRENCY",
 )
 @click.option(
     "--backlog",
-    default=None,
+    default=2048,
     type=int,
     help="Set the maximum number of pending connections (uvicorn backlog parameter)",
     envvar="BACKLOG",