-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(bench): Add pipeline FlashAttention-2 implementation. #23
Changes from 2 commits
d3fccac
b0bd2da
49e9cf7
e57fa5c
710e5df
55b68a4
f593d47
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"files.associations": { | ||
"array": "cpp", | ||
"string": "cpp", | ||
"string_view": "cpp", | ||
"span": "cpp", | ||
"bitset": "cpp", | ||
"initializer_list": "cpp", | ||
"utility": "cpp", | ||
"*.tcc": "cpp", | ||
"chrono": "cpp", | ||
"random": "cpp", | ||
"limits": "cpp", | ||
"semaphore": "cpp" | ||
}, | ||
"gotoSymbolStack.currentStackPosition": 0, | ||
"gotoSymbolStack.maxStackPosition": 0, | ||
"gotoSymbolStack.filePositionInfo": [] | ||
} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the | ||
# MIT License. | ||
# -------------------------------------------------------------------------- | ||
|
||
cmake_minimum_required(VERSION 3.25 FATAL_ERROR) | ||
project(gemm_bench LANGUAGES C CXX CUDA) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the project name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops! I forgot to make the modifications, but they have been made now. |
||
|
||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} | ||
"${PROJECT_SOURCE_DIR}/../../../cmake") | ||
set(THIRD_PARTY_DIR "${PROJECT_SOURCE_DIR}/../../../3rd-party") | ||
|
||
include(generic) | ||
|
||
include_directories("${PROJECT_SOURCE_DIR}/../../../include") | ||
include_directories("${PROJECT_SOURCE_DIR}/../../utils/cpp") | ||
include_directories("${THIRD_PARTY_DIR}/cutlass/include") | ||
|
||
add_executable(flash_attn main.cu) | ||
target_link_libraries(flash_attn ${CUDA_CUBLAS_LIBRARIES}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is CuBLAS utilized in this code? It doesn't appear to be. Do we need to link it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
# -------------------------------------------------------------------------- | ||
|
||
|
||
BUILD_DIR := build | ||
|
||
.PHONY: build clean | ||
|
||
build: | ||
@mkdir -p $(BUILD_DIR) | ||
@cd $(BUILD_DIR) && cmake .. && make -j$(proc) | ||
|
||
clean: | ||
@rm -rf $(BUILD_DIR) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#pragma once | ||
|
||
#include "cuda_utils.cuh" | ||
|
||
#include <cute/layout.hpp> | ||
#include <cute/tensor.hpp> | ||
#include <cutlass/numeric_conversion.h> | ||
|
||
namespace benchmarks { | ||
namespace cutlass_wrapper { | ||
|
||
using namespace cute; | ||
|
||
template <typename To_type, typename Engine, typename Layout> | ||
CUTE_DEVICE auto convert_type(cute::Tensor<Engine, Layout> const& tensor) { | ||
using From_type = typename Engine::value_type; | ||
constexpr int numel = decltype(size(tensor))::value; | ||
cutlass::NumericArrayConverter<To_type, From_type, numel> convert_op; | ||
auto frag = | ||
convert_op(*reinterpret_cast<const cutlass::Array<From_type, numel>*>( | ||
tensor.data())); | ||
return make_tensor(make_rmem_ptr<To_type>(&frag), tensor.layout()); | ||
} | ||
|
||
template <typename Layout> | ||
DEVICE auto convert_layout_rowcol_Aregs(Layout rowcol_layout) { | ||
using namespace cute; | ||
static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2); | ||
static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2); | ||
auto l = logical_divide(rowcol_layout, | ||
Shape<Underscore, Shape<Underscore, Int<2>>>{}); | ||
|
||
return make_layout(make_layout(get<0>(get<1>(l)), get<0>(get<0>(l)), | ||
get<0>(get<1>(get<1>(l)))), | ||
get<1>(get<0>(l)), get<1>(get<1>(get<1>(l)))); | ||
} | ||
|
||
DEVICE auto convert_layout_C_Aregs() { | ||
using namespace cute; | ||
auto layout_s = Layout<Shape<Shape<_2, _2>, _2, _16>>{}; | ||
auto l = logical_divide(layout_s, Shape<Underscore, Underscore, _2>{}); | ||
|
||
return make_layout( | ||
make_layout(get<0>(get<0>(l)), get<1>(get<0>(l)), get<0>(get<2>(l))), | ||
get<1>(l), get<1>(get<2>(l))); | ||
} | ||
|
||
template <class LayoutType> | ||
DEVICE auto convert_layout_scores(LayoutType layout_s) { | ||
using namespace cute; | ||
static_assert(decltype(size<0>(layout_s))::value == 4); | ||
static_assert(decltype(rank(layout_s))::value == 3); | ||
|
||
auto l = logical_divide(layout_s, Shape<_2>{}); | ||
return make_layout(make_layout(get<1>(get<0>(l)), get<1>(l)), | ||
make_layout(get<0>(get<0>(l)), get<2>(l))); | ||
} | ||
|
||
template <int ATOMNUM, class LayoutType> | ||
DEVICE auto convert_layout_scores_copyview(LayoutType layout_s) { | ||
using namespace cute; | ||
|
||
auto l = logical_divide(layout_s, Shape<Underscore, Int<ATOMNUM>>{}); | ||
return make_layout(get<0>(get<1>(l)), get<0>(l), get<1>(get<1>(l))); | ||
} | ||
|
||
} // namespace cutlass_wrapper | ||
} // namespace benchmarks |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am curious as to why the pre-commit hooks (see: https://github.com/microsoft/TileFusion/blob/master/.pre-commit-config.yaml#L28) do not address these unseen characters, which are often caused by differences in IDEs. I have observed this issue several times. This hook is supposed to fix it automatically before filing a PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just used
pre-commit run --all-files
to automatically fix the issues, but it seems that when I use Git to commit, it doesn't automatically fix all files before the pre-commit hook. I will check the reason for this issue later.