Skip to content

Commit 5c6d9c1

Browse files
Removed flash infer.
1 parent 84d8555 commit 5c6d9c1

File tree

5 files changed

+19
-74
lines changed

5 files changed

+19
-74
lines changed

mason.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,6 @@ def get_env_vars(
242242
additional_secrets: List[Dict[str, str]],
243243
):
244244
env_vars = []
245-
if "VLLM_ATTENTION_BACKEND" not in additional_env_vars:
246-
env_vars.append(beaker.EnvVar(name="VLLM_ATTENTION_BACKEND", value="FLASHINFER"))
247245
if "RAY_CGRAPH_get_timeout" not in additional_env_vars:
248246
env_vars.append(beaker.EnvVar(name="RAY_CGRAPH_get_timeout", value="300"))
249247
# Add user-specified environment variables first

open_instruct/vllm_utils3.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ def __init__(
390390
self.logger.info(f"Contents of /weka/oe-training-default: {contents}")
391391

392392
self.llm_engine = vllm.LLMEngine.from_engine_args(vllm.EngineArgs(*args, **kwargs))
393+
self.logger.info("initialized llmengine")
393394

394395
self.prompt_queue = prompt_queue
395396
self.results_queue = results_queue
@@ -401,6 +402,9 @@ def __init__(
401402
self._should_stop_value = False
402403
self._should_stop_timeout_s = 5
403404

405+
# Logging interval for process_from_queue
406+
self.log_interval = 1000
407+
404408
def _should_stop(self) -> bool:
405409
if (time.perf_counter() - self._last_should_stop_update) > self._should_stop_timeout_s:
406410
should_stop_ref = self.actor_manager.should_stop.remote()
@@ -455,10 +459,24 @@ def _process_request(self, request):
455459

456460
outputs = []
457461
iteration = 0
462+
process_start_time = time.perf_counter()
458463

459464
while True:
460465
iteration += 1
461466

467+
# Periodic logging
468+
if iteration % self.log_interval == 0:
469+
elapsed_time = time.perf_counter() - process_start_time
470+
num_unfinished = self.llm_engine.get_num_unfinished_requests()
471+
pending_tools = len(tracking["pending_tool_futures"]) if tracking else 0
472+
self.logger.info(
473+
f"[LLMRayActor] Status update - Iteration: {iteration}, "
474+
f"Unfinished requests: {num_unfinished}, "
475+
f"Pending tool futures: {pending_tools}, "
476+
f"Outputs collected: {len(outputs)}, "
477+
f"Elapsed time: {elapsed_time:.2f}s"
478+
)
479+
462480
# Poll tool futures first (matching ToolUseLLM order)
463481
if tracking and tracking.get("pending_tool_futures"):
464482
outputs.extend(self._poll_tool_futures(tracking, tokenizer))

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ dependencies = [
3131
"pytest-xdist==3.8.0",
3232
"flash-attn>=2.8.0.post2; platform_system != 'Darwin'",
3333
"liger-kernel>=0.5.4; platform_system != 'Darwin'",
34-
"flashinfer-python==0.2.8; platform_system == 'Linux' and platform_machine == 'x86_64'",
3534
]
3635

3736
[build-system]

scripts/launch_benchmark.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ for model_name_or_path in "$@"; do
6363
--max_token_length 10240 \
6464
--max_prompt_token_length 2048 \
6565
--temperature 1.0 \
66+
--verbose True \
6667
--response_length "$response_length" \
6768
--vllm_top_p 0.9 \
6869
--num_unique_prompts_rollout 4 \

uv.lock

Lines changed: 0 additions & 71 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)