diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index 01131807d2021..88a735de40056 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -12,7 +12,7 @@ export MAX_JOBS=1 # Make sure release wheels are built for the following architectures export PYTORCH_ROCM_ARCH="gfx90a;gfx942" -rm -f $(which sccache) +rm -f "$(which sccache)" export MAX_JOBS=32 diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 212177e53e85b..b2320d58f92d2 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1,7 +1,7 @@ import contextlib import functools import importlib -from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import torch import torch.library @@ -242,8 +242,8 @@ def scaled_rms_norm(out: torch.Tensor, input: torch.Tensor, def scaled_fused_add_rms_norm(out: torch.Tensor, input: torch.Tensor, residual: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor, epsilon: float) -> None: - torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual, weight, scale, - epsilon) + torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual, + weight, scale, epsilon) def advance_step_flashattn(num_seqs: int, num_queries: int, block_size: int, diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py index a8f4b09b67274..7d7967a1c0329 100644 --- a/vllm/attention/backends/hpu_attn.py +++ b/vllm/attention/backends/hpu_attn.py @@ -141,6 +141,7 @@ def forward( k_scale: float = 1.0, v_scale: float = 1.0, attn_type: AttentionType = AttentionType.DECODER, + fp8_out_scale: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Forward pass with xFormers and PagedAttention. diff --git a/vllm/utils.py b/vllm/utils.py index 0a51b6a2bd78e..211d3e86c8b05 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -158,7 +158,7 @@ class _Sentinel: ALL_PINNED_SENTINEL = _Sentinel() -class rpd_trace(): +class rpd_trace: def __init__(self, filename=None, @@ -244,7 +244,7 @@ def is_hipScopedMarker_available(): return hipScopedMarker is not None -class rpd_mark(): +class rpd_mark: def __init__(self, name=None): self.name = name