diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py
index b24bfc8dacf..8871eae3cc5 100644
--- a/python/sglang/srt/layers/sampler.py
+++ b/python/sglang/srt/layers/sampler.py
@@ -72,9 +72,11 @@ def forward(
                     # NOTE: the top_p_renorm_prob from flashinfer has numerical problems,
                     # https://github.com/flashinfer-ai/flashinfer/issues/708
                     # so we use the torch implementation.
+
+                    # clamp to avoid -inf.
                     logprobs = torch.log(
                         top_p_normalize_probs_torch(probs, sampling_info.top_ps)
-                    )
+                    ).clamp(min=torch.finfo(probs.dtype).min)
 
                 max_top_k_round, batch_size = 32, probs.shape[0]
                 uniform_samples = torch.rand(
@@ -109,9 +111,10 @@ def forward(
                     sampling_info.need_min_p_sampling,
                 )
                 if return_logprob:
+                    # clamp to avoid -inf.
                     logprobs = torch.log(
                         top_p_normalize_probs_torch(probs, sampling_info.top_ps)
-                    )
+                    ).clamp(min=torch.finfo(probs.dtype).min)
             else:
                 raise ValueError(
                     f"Invalid sampling backend: {global_server_args_dict['sampling_backend']}"
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 53e1f4edae0..93202fdc30e 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -701,7 +701,7 @@ def configure_logging(self, obj: ConfigureLoggingReq):
     def create_abort_task(self, obj: GenerateReqInput):
         # Abort the request if the client is disconnected.
         async def abort_request():
-            await asyncio.sleep(1)
+            await asyncio.sleep(0.02)
             if obj.is_single:
                 self.abort_request(obj.rid)
             else:
diff --git a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
index 5245905f79b..d9d77a9ae24 100644
--- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
@@ -58,8 +58,7 @@ def run_decode(
                 "logprob_start_len": 0,
             },
         )
-        print(json.dumps(response.json()))
-        print("=" * 100)
+        assert response.status_code == 200, "Request failed: " + response.text
 
     def test_default_values(self):
         self.run_decode()
@@ -112,4 +111,4 @@ def test_repetition_penalty(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main(verbosity=3)