Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/cuda reduce #1064

Draft
wants to merge 34 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
21c7d35
SAVE WORK SQUASH COMMIT
ZelboK Aug 23, 2023
f909b9d
save
ZelboK Aug 25, 2023
17c4592
getting closer
ericniebler Aug 29, 2023
71cd446
save work before merge main squash
ZelboK Sep 1, 2023
201b0ab
Revert "save work before merge main squash"
ZelboK Sep 1, 2023
217e5d1
save before change branch squash later pls
ZelboK Sep 1, 2023
6c10550
Revert "save before change branch squash later pls"
ZelboK Sep 1, 2023
e6a37e3
Merge branch 'main' into feature/cuda_reduce
ZelboK Sep 1, 2023
ae4297a
commit before chage branch squash later
ZelboK Sep 1, 2023
2aa2d9a
push upwards for second opinions
ZelboK Sep 4, 2023
1e517a3
remove noise
ZelboK Sep 4, 2023
b024e09
demonstrate recursive sender transformation
ericniebler Sep 4, 2023
6a6aafa
add test for static_thread_pool bulk concurrency; cleanup
ericniebler Sep 5, 2023
fcd7747
__reconstitute is just make_sender, rename apply_sender
ericniebler Sep 5, 2023
5fb2849
Merge remote-tracking branch 'origin/main' into feature/cuda_reduce
ericniebler Sep 5, 2023
f0dc533
Merge branch 'on-redux' into feature/cuda_reduce
ericniebler Sep 11, 2023
b94bd4d
Merge remote-tracking branch 'origin/main' into feature/cuda_reduce
ericniebler Sep 15, 2023
8722f36
fix cycle in type system in stream scheduler concepts
ericniebler Sep 15, 2023
8350038
Merge pull request #1 from ericniebler/feature/cuda_reduce
ZelboK Sep 15, 2023
76f863d
Merge remote-tracking branch 'origin/main' into tmp_cuda_reduce
ericniebler Sep 23, 2023
b240e36
Merge remote-tracking branch 'origin/main' into HEAD
ericniebler Sep 25, 2023
6dce716
Merge branch 'main' into HEAD
ericniebler Sep 30, 2023
d97b5df
Merge remote-tracking branch 'origin/main' into cuda_reduce
ericniebler Oct 9, 2023
82657c4
Merge remote-tracking branch 'origin/main' into cuda_reduce
ericniebler Oct 18, 2023
6f55541
merge main
ZelboK Oct 22, 2023
b5a09b8
Merge branch 'main' of github.com:NVIDIA/stdexec into feature/cuda_re…
trxcllnt Jan 8, 2024
7931851
update type names
trxcllnt Jan 8, 2024
298cae0
use int instead of float
trxcllnt Jan 8, 2024
bdd9307
clean up include
trxcllnt Jan 9, 2024
a73f622
Merge branch 'main' of github.com:NVIDIA/stdexec into feature/cuda_re…
trxcllnt Jan 9, 2024
8e62305
update aws-actions/configure-aws-credentials version
trxcllnt Jan 9, 2024
ed370d6
update aws-actions/configure-aws-credentials version
trxcllnt Jan 9, 2024
200fa34
change CI CPU workflow trigger
trxcllnt Jan 9, 2024
7ec74dd
Merge branch 'main' into feature/cuda_reduce
trxcllnt Jan 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ target_compile_options(stdexec_executable_flags INTERFACE
# Template backtrace limit
target_compile_options(stdexec_executable_flags INTERFACE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
-ferror-limit=0
-ferror-limit=1
-fmacro-backtrace-limit=0
-ftemplate-backtrace-limit=0>
)
Expand Down
33 changes: 28 additions & 5 deletions examples/nvexec/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,42 @@
#include <span>

namespace ex = stdexec;
using stdexec::__tag_invoke::tag_invoke;
struct sink_receiver {
using is_receiver = void;
friend void tag_invoke(stdexec::set_value_t, sink_receiver, auto&&...) noexcept {}
friend void tag_invoke(stdexec::set_error_t, sink_receiver, auto&&) noexcept {}
friend void tag_invoke(stdexec::set_stopped_t, sink_receiver) noexcept {}
friend stdexec::empty_env tag_invoke(stdexec::get_env_t, sink_receiver) noexcept { return {}; }
};

struct empty_environment {
};

template <class...>
[[deprecated]] void print() {}

// unqualified call to tag_invoke:
int main() {
const int n = 2 * 1024;
thrust::device_vector<float> input(n, 1.0f);
float* first = thrust::raw_pointer_cast(input.data());
float* last = thrust::raw_pointer_cast(input.data()) + input.size();

nvexec::stream_context stream_ctx{};
auto sched = stream_ctx.get_scheduler();

auto snd = ex::transfer_just(stream_ctx.get_scheduler(), std::span{first, last})
// auto domain = ex::get_domain(sched);
// print<decltype(domain)>();

auto snd = ex::just(std::span{first, last})
| nvexec::reduce(42.0f);

auto [result] = stdexec::sync_wait(std::move(snd)).value();
// ::print<stdexec::__detail::__name_of<decltype(snd)>>();
// nvexec::stream_scheduler gpu = stream_ctx.get_scheduler();
// using stdexec::__tag_invoke::tag_invoke;
// tag_invoke(stdexec::get_completion_signatures, snd, empty_environment{});

auto [result] =
stdexec::sync_wait(ex::on(sched, std::move(snd))).value();

std::cout << "result: " << result << std::endl;
}
}
40 changes: 38 additions & 2 deletions include/nvexec/stream/algorithm_base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,22 +91,58 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS::__algo_range_init_fun {
};
};

template <class SenderId, class InitT, class Fun, class DerivedSender>
template <class Tag, class SenderId, class InitT, class Fun, class DerivedSender>
struct sender_t {

struct __t : stream_sender_base {


using Sender = stdexec::__t<SenderId>;
using __id = sender_t;

using is_sender = void;
template <class Receiver>
using receiver_t = typename DerivedSender::template receiver_t<Receiver>;

template <class Range>
using _set_value_t = typename DerivedSender::template _set_value_t<Range>;

Sender sndr_;
// why is this called initT, anyway? If other algorithms will use this in the future im not sure initT is a good name
STDEXEC_NO_UNIQUE_ADDRESS InitT init_;
STDEXEC_NO_UNIQUE_ADDRESS Fun fun_;

template <std::size_t Index, typename... Types>
using nth_type_of = std::tuple_element_t<Index, std::tuple<Types...>>;

template <typename T>
void print_type_name() const {
stdexec::print(std::declval<stdexec::__detail::__name_of<T>>());
}
template <typename T>
struct print_the_type;

// This shouldn't be here. Imo I think algorithm_base should
// have a __data struct that each inheritor is responsible for providing. I put this here to get things to compile.
template <class _InitT, class _Fun>
struct __data {
_InitT __initT_;
STDEXEC_NO_UNIQUE_ADDRESS _Fun __fun_;
static constexpr auto __mbrs_ = __mliterals<&__data::__initT_, &__data::__fun_>();
};
template <class _InitT, class _Fun>
__data(_InitT, _Fun) -> __data<_InitT, _Fun>;

// this is basically the apply function that sender_apply is looking for.
template <class S, class Fn>
auto plscompile(S s, Fn f) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be renamed to apply. I was conducting an experiment.

auto inside = s.sndr_;
auto data = __data(s.init_, s.fun_);
// stdexec::__detail::__name_of<decltype(inside)> hi;
auto invoked = f(std::declval<Tag>(), data, inside);
return invoked;
}


template <class Self, class Env>
using completion_signatures = //
__try_make_completion_signatures<
Expand Down
5 changes: 4 additions & 1 deletion include/nvexec/stream/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

namespace nvexec {
using stdexec::operator""__csz;

[[gnu::deprecated]]
void print(auto&&...) {}
enum class stream_priority {
high,
normal,
Expand Down Expand Up @@ -312,6 +313,8 @@ namespace nvexec {
set_stopped_t>>;

inline constexpr get_stream_provider_t get_stream_provider{};
[[gnu::deprecated]]
void prints(auto&&...) {}

struct get_stream_t {
template <class Env>
Expand Down
Loading