Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
  • Loading branch information
LucasWilkinson committed Jan 20, 2025
1 parent 55bf1b8 commit 17990c0
Show file tree
Hide file tree
Showing 12 changed files with 19 additions and 18 deletions.
12 changes: 6 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
FetchContent_Declare(
cutlass
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
GIT_TAG ef5620dd1d5e7c0af33083fae17b43c34f1d8950
GIT_TAG v3.7.0
GIT_PROGRESS TRUE

# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
GIT_SHALLOW FALSE # update when we can go back to using a TAG
GIT_SHALLOW TRUE
)
endif()
FetchContent_MakeAvailable(cutlass)
Expand Down Expand Up @@ -279,10 +279,10 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS)
set(SRCS
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_c3x_sm90_fp8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_c3x_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_c3x_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_c3x_sm90_fp8.cu")
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_3X_ARCHS}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// clang-format off
// Adapted (Heavily) from: https://github.com/soundOfDestiny/cutlass/blob/9d997ce0dea4c5fa1a617db6b7ff29aa9235822c/include/cutlass/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp

/***************************************************************************************************
* Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "scaled_mm_c3x_kernels.hpp"
#include "scaled_mm_c3x_sm90_int8_dispatch.cuh"
#include "scaled_mm_kernels.hpp"
#include "scaled_mm_sm90_int8_dispatch.cuh"
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"

namespace vllm {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

#include "scaled_mm_c3x_kernels.hpp"
#include "scaled_mm_blockwise_c3x_sm90_fp8_dispatch.cuh"
#include "scaled_mm_kernels.hpp"
#include "scaled_mm_blockwise_sm90_fp8_dispatch.cuh"
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"

namespace vllm {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct cutlass_3x_gemm_fp8_blockwise {

using ElementC = void;
using StrideC = StrideD;
static constexpr int AlignmentC = 4;
static constexpr int AlignmentC = AlignmentD;

using ElementAccumulator = float;
using ElementBlockScale = float;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "scaled_mm_c3x_kernels.hpp"
#include "scaled_mm_c3x_sm90_fp8_dispatch.cuh"
#include "scaled_mm_kernels.hpp"
#include "scaled_mm_sm90_fp8_dispatch.cuh"
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"

namespace vllm {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include "scaled_mm_c3x.cuh"
#include "scaled_mm.cuh"
#include "cutlass_gemm_caller.cuh"

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "scaled_mm_c3x_kernels.hpp"
#include "scaled_mm_c3x_sm90_int8_dispatch.cuh"
#include "scaled_mm_kernels.hpp"
#include "scaled_mm_sm90_int8_dispatch.cuh"
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"

namespace vllm {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include "scaled_mm_c3x.cuh"
#include "scaled_mm.cuh"
#include "cutlass_gemm_caller.cuh"

/**
Expand Down
2 changes: 1 addition & 1 deletion csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <cudaTypedefs.h>
#include "c3x/scaled_mm_c3x_kernels.hpp"
#include "c3x/scaled_mm_kernels.hpp"

#include "core/math.hpp"

Expand Down

0 comments on commit 17990c0

Please sign in to comment.