Removed flash infer.

finbarrtimbers · finbarrtimbers · commit 5c6d9c121e1c · 2025-09-09T14:16:12.000Z
diff --git a/mason.py b/mason.py
@@ -242,8 +242,6 @@ def get_env_vars(
     additional_secrets: List[Dict[str, str]],
 ):
     env_vars = []
-    if "VLLM_ATTENTION_BACKEND" not in additional_env_vars:
-        env_vars.append(beaker.EnvVar(name="VLLM_ATTENTION_BACKEND", value="FLASHINFER"))
     if "RAY_CGRAPH_get_timeout" not in additional_env_vars:
         env_vars.append(beaker.EnvVar(name="RAY_CGRAPH_get_timeout", value="300"))
     # Add user-specified environment variables first
diff --git a/open_instruct/vllm_utils3.py b/open_instruct/vllm_utils3.py
@@ -390,6 +390,7 @@ def __init__(
         self.logger.info(f"Contents of /weka/oe-training-default: {contents}")
 
         self.llm_engine = vllm.LLMEngine.from_engine_args(vllm.EngineArgs(*args, **kwargs))
+        self.logger.info("initialized llmengine")
 
         self.prompt_queue = prompt_queue
         self.results_queue = results_queue
@@ -401,6 +402,9 @@ def __init__(
         self._should_stop_value = False
         self._should_stop_timeout_s = 5
 
+        # Logging interval for process_from_queue
+        self.log_interval = 1000
+
     def _should_stop(self) -> bool:
         if (time.perf_counter() - self._last_should_stop_update) > self._should_stop_timeout_s:
             should_stop_ref = self.actor_manager.should_stop.remote()
@@ -455,10 +459,24 @@ def _process_request(self, request):
 
         outputs = []
         iteration = 0
+        process_start_time = time.perf_counter()
 
         while True:
             iteration += 1
 
+            # Periodic logging
+            if iteration % self.log_interval == 0:
+                elapsed_time = time.perf_counter() - process_start_time
+                num_unfinished = self.llm_engine.get_num_unfinished_requests()
+                pending_tools = len(tracking["pending_tool_futures"]) if tracking else 0
+                self.logger.info(
+                    f"[LLMRayActor] Status update - Iteration: {iteration}, "
+                    f"Unfinished requests: {num_unfinished}, "
+                    f"Pending tool futures: {pending_tools}, "
+                    f"Outputs collected: {len(outputs)}, "
+                    f"Elapsed time: {elapsed_time:.2f}s"
+                )
+
             # Poll tool futures first (matching ToolUseLLM order)
             if tracking and tracking.get("pending_tool_futures"):
                 outputs.extend(self._poll_tool_futures(tracking, tokenizer))
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,6 @@ dependencies = [
     "pytest-xdist==3.8.0",
     "flash-attn>=2.8.0.post2; platform_system != 'Darwin'",
     "liger-kernel>=0.5.4; platform_system != 'Darwin'",
-    "flashinfer-python==0.2.8; platform_system == 'Linux' and platform_machine == 'x86_64'",
 ]
 
 [build-system]
diff --git a/scripts/launch_benchmark.sh b/scripts/launch_benchmark.sh
@@ -63,6 +63,7 @@ for model_name_or_path in "$@"; do
             --max_token_length 10240 \
             --max_prompt_token_length 2048 \
             --temperature 1.0 \
+	    --verbose True \
             --response_length "$response_length" \
             --vllm_top_p 0.9 \
             --num_unique_prompts_rollout 4 \
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,6 @@ dependencies = [`
`31`	`31`	`"pytest-xdist==3.8.0",`
`32`	`32`	`"flash-attn>=2.8.0.post2; platform_system != 'Darwin'",`
`33`	`33`	`"liger-kernel>=0.5.4; platform_system != 'Darwin'",`
`34`		`- "flashinfer-python==0.2.8; platform_system == 'Linux' and platform_machine == 'x86_64'",`
`35`	`34`	`]`
`36`	`35`
`37`	`36`	`[build-system]`