Skip to content

Commit

Permalink
Merge branch 'main' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
ericniebler committed Sep 30, 2023
2 parents b240e36 + 79d726f commit 6dce716
Show file tree
Hide file tree
Showing 13 changed files with 488 additions and 120 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/ci.gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,19 @@ jobs:
-DCMAKE_CUDA_COMPILER="$cxx" \
-DCMAKE_CUDA_ARCHITECTURES=${{ matrix.sm }};
# Compile
cmake --build build;
cmake --build build -v;
# Print sccache stats
sccache -s
# Tests
ctest --test-dir build --verbose --output-on-failure --timeout 60;
# Examples
./build/examples/nvexec/maxwell_cpu_st --iterations=1000 --N=512 --run-cpp --run-inline-scheduler
./build/examples/nvexec/maxwell_cpu_mt --iterations=1000 --N=512 --run-std --run-stdpar --run-thread-pool-scheduler
./build/examples/nvexec/maxwell_gpu_s --iterations=1000 --N=512 --run-cuda --run-stdpar --run-stream-scheduler
ci-gpu:
runs-on: ubuntu-latest
name: CI (GPU)
Expand Down
11 changes: 11 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@
"initCommands": ["settings set target.disable-aslr false"],
"args": "${input:CXX_PROGRAM_ARGS}",
},
{
"name": "CUDA Current Target (cuda-gdb)",
"type": "cuda-gdb",
"request": "launch",
"stopAtEntry": false,
"breakOnLaunch": false,
"internalConsoleOptions": "neverOpen",
"program": "${command:cmake.launchTargetPath}",
"cwd": "${command:cmake.launchTargetDirectory}",
"args": "${input:CXX_PROGRAM_ARGS}",
},
],
"inputs": [
{
Expand Down
21 changes: 19 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ target_compile_options(stdexec INTERFACE
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/Zc:__cplusplus /Zc:preprocessor>
)

option(STDEXEC_ENABLE_EXTRA_TYPE_CHECKING "Enable extra type checking that is costly at compile-time" OFF)

if (STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
target_compile_definitions(stdexec INTERFACE STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
endif()

add_library(STDEXEC::stdexec ALIAS stdexec)

# Don't require building everything when installing
Expand Down Expand Up @@ -199,6 +205,17 @@ target_compile_options(stdexec_executable_flags INTERFACE
-include stdexec/__detail/__force_include.hpp>
)

target_compile_definitions(stdexec_executable_flags INTERFACE
STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)

# Support target for examples and tests
add_library(nvexec_executable_flags INTERFACE)

target_compile_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)
target_link_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)

# Set up nvexec library
option(STDEXEC_ENABLE_CUDA "Enable CUDA targets for non-nvc++ compilers" OFF)
if(CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
Expand All @@ -220,9 +237,9 @@ if(STDEXEC_ENABLE_CUDA)
target_link_libraries(nvexec INTERFACE STDEXEC::stdexec)

target_compile_options(nvexec INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
target_link_options(nvexec INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)

if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC"))
include(rapids-cuda)
Expand Down
3 changes: 3 additions & 0 deletions examples/nvexec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ add_library(nvexec_example INTERFACE)
target_include_directories(nvexec_example
INTERFACE ${CMAKE_CURRENT_LIST_DIR}
)
target_link_libraries(nvexec_example
INTERFACE nvexec_executable_flags
)

add_library(stdpar_multicore INTERFACE)
target_include_directories(stdpar_multicore
Expand Down
49 changes: 1 addition & 48 deletions include/exec/inline_scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,52 +22,5 @@
namespace exec {
// A simple scheduler that executes its continuation inline, on the
// thread of the caller of start().
struct inline_scheduler {
template <class R_>
struct __op {
using R = stdexec::__t<R_>;
STDEXEC_NO_UNIQUE_ADDRESS R rec_;

friend void tag_invoke(stdexec::start_t, __op& op) noexcept {
stdexec::set_value((R&&) op.rec_);
}
};

struct __sender {
using is_sender = void;
using completion_signatures = stdexec::completion_signatures<stdexec::set_value_t()>;

template <class R>
friend auto tag_invoke(stdexec::connect_t, __sender, R&& rec) //
noexcept(stdexec::__nothrow_constructible_from<stdexec::__decay_t<R>, R>)
-> __op<stdexec::__x<stdexec::__decay_t<R>>> {
return {(R&&) rec};
}

struct __env {
friend inline_scheduler
tag_invoke(stdexec::get_completion_scheduler_t<stdexec::set_value_t>, const __env&) //
noexcept {
return {};
}
};

friend __env tag_invoke(stdexec::get_env_t, const __sender&) noexcept {
return {};
}
};

STDEXEC_DETAIL_CUDACC_HOST_DEVICE //
friend __sender
tag_invoke(stdexec::schedule_t, const inline_scheduler&) noexcept {
return {};
}

friend stdexec::forward_progress_guarantee
tag_invoke(stdexec::get_forward_progress_guarantee_t, const inline_scheduler&) noexcept {
return stdexec::forward_progress_guarantee::weakly_parallel;
}

bool operator==(const inline_scheduler&) const noexcept = default;
};
using inline_scheduler = stdexec::__inln::__scheduler;
}
204 changes: 204 additions & 0 deletions include/exec/sequence/transform_each.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/*
* Copyright (c) 2023 Maikel Nadolski
* Copyright (c) 2023 NVIDIA Corporation
*
* Licensed under the Apache License Version 2.0 with LLVM Exceptions
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://llvm.org/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "../sequence_senders.hpp"

#include "../__detail/__basic_sequence.hpp"

namespace exec {
namespace __transform_each {
using namespace stdexec;

template <class _Receiver, class _Adaptor>
struct __operation_base {
_Receiver __receiver_;
_Adaptor __adaptor_;
};

template <class _ReceiverId, class _Adaptor>
struct __receiver {
using _Receiver = stdexec::__t<_ReceiverId>;

struct __t {
using is_receiver = void;
using __id = __receiver;
__operation_base<_Receiver, _Adaptor>* __op_;

template <same_as<set_next_t> _SetNext, same_as<__t> _Self, class _Item>
requires __callable<_Adaptor&, _Item>
&& __callable<exec::set_next_t, _Receiver&, __call_result_t<_Adaptor&, _Item>>
friend auto tag_invoke(_SetNext, _Self& __self, _Item&& __item) noexcept(
__nothrow_callable<_SetNext, _Receiver&, __call_result_t<_Adaptor&, _Item>> //
&& __nothrow_callable<_Adaptor&, _Item>)
-> next_sender_of_t<_Receiver, __call_result_t<_Adaptor&, _Item>> {
return exec::set_next(
__self.__op_->__receiver_, __self.__op_->__adaptor_(static_cast<_Item&&>(__item)));
}

template <same_as<set_value_t> _SetValue, same_as<__t> _Self>
friend void tag_invoke(_SetValue, _Self&& __self) noexcept {
stdexec::set_value(static_cast<_Receiver&&>(__self.__op_->__receiver_));
}

template <same_as<set_stopped_t> _SetStopped, same_as<__t> _Self>
requires __callable<_SetStopped, _Receiver&&>
friend void tag_invoke(_SetStopped, _Self&& __self) noexcept {
stdexec::set_stopped(static_cast<_Receiver&&>(__self.__op_->__receiver_));
}

template <same_as<set_error_t> _SetError, same_as<__t> _Self, class _Error>
requires __callable<_SetError, _Receiver&&, _Error>
friend void tag_invoke(_SetError, _Self&& __self, _Error&& __error) noexcept {
stdexec::set_error(
static_cast<_Receiver&&>(__self.__op_->__receiver_), static_cast<_Error&&>(__error));
}

template <same_as<get_env_t> _GetEnv, __decays_to<__t> _Self>
friend env_of_t<_Receiver> tag_invoke(_GetEnv, _Self&& __self) noexcept {
return stdexec::get_env(__self.__op_->__receiver_);
}
};
};

template <class _Sender, class _ReceiverId, class _Adaptor>
struct __operation {
using _Receiver = stdexec::__t<_ReceiverId>;

struct __t : __operation_base<_Receiver, _Adaptor> {
using __id = __operation;
subscribe_result_t<_Sender, stdexec::__t<__receiver<_ReceiverId, _Adaptor>>> __op_;

__t(_Sender&& __sndr, _Receiver __rcvr, _Adaptor __adaptor)
: __operation_base<
_Receiver,
_Adaptor>{static_cast<_Receiver&&>(__rcvr), static_cast<_Adaptor&&>(__adaptor)}
, __op_{exec::subscribe(
static_cast<_Sender&&>(__sndr),
stdexec::__t<__receiver<_ReceiverId, _Adaptor>>{this})} {
}

friend void tag_invoke(start_t, __t& __self) noexcept {
stdexec::start(__self.__op_);
}
};
};

template <class _Receiver>
struct __subscribe_fn {
_Receiver& __rcvr_;

template <class _Adaptor, class _Sequence>
auto operator()(__ignore, _Adaptor __adaptor, _Sequence&& __sequence) noexcept(
__nothrow_decay_copyable<_Adaptor> && __nothrow_decay_copyable<_Sequence>
&& __nothrow_decay_copyable<_Receiver>)
-> __t< __operation<_Sequence, __id<_Receiver>, _Adaptor>> {
return {
static_cast<_Sequence&&>(__sequence),
static_cast<_Receiver&&>(__rcvr_),
static_cast<_Adaptor&&>(__adaptor)};
}
};

template <class _Adaptor>
struct _NOT_CALLABLE_ADAPTOR_ { };

template <class _Item>
struct _WITH_ITEM_SENDER_ { };

template <class _Adaptor, class _Item>
auto __try_call(_Item*)
-> stdexec::__mexception<_NOT_CALLABLE_ADAPTOR_<_Adaptor&>, _WITH_ITEM_SENDER_<stdexec::__name_of<_Item>>>;

template <class _Adaptor, class _Item>
requires stdexec::__callable<_Adaptor&, _Item>
stdexec::__msuccess __try_call(_Item*);

template <class _Adaptor, class... _Items>
auto __try_calls(item_types<_Items...>*)
-> decltype((stdexec::__msuccess() && ... && __try_call<_Adaptor>((_Items*) nullptr)));

template <class _Adaptor, class _Items>
concept __callabale_adaptor_for = requires(_Items* __items) {
{ __try_calls<stdexec::__decay_t<_Adaptor>>(__items) } -> stdexec::__ok;
};

struct transform_each_t {
template <sender _Sequence, __sender_adaptor_closure _Adaptor>
auto operator()(_Sequence&& __sndr, _Adaptor&& __adaptor) const noexcept(
__nothrow_decay_copyable<_Sequence> //
&& __nothrow_decay_copyable<_Adaptor>) {
return make_sequence_expr<transform_each_t>(
static_cast<_Adaptor&&>(__adaptor), static_cast<_Sequence&&>(__sndr));
}

template <class _Adaptor>
constexpr auto operator()(_Adaptor __adaptor) const noexcept
-> __binder_back<transform_each_t, _Adaptor> {
return {{}, {}, {static_cast<_Adaptor&&>(__adaptor)}};
}

template <class _Self, class _Env>
using __completion_sigs_t = __sequence_completion_signatures_of_t<__child_of<_Self>, _Env>;

template <sender_expr_for<transform_each_t> _Self, class _Env>
static __completion_sigs_t<_Self, _Env> get_completion_signatures(_Self&&, _Env&&) noexcept {
return {};
}

template <class _Self, class _Env>
using __item_types_t = stdexec::__mapply<
stdexec::__transform<
stdexec::__mbind_front_q<__call_result_t, __data_of<_Self>&>,
stdexec::__munique<stdexec::__q<item_types>>>,
item_types_of_t<__child_of<_Self>, _Env>>;

template <sender_expr_for<transform_each_t> _Self, class _Env>
static __item_types_t<_Self, _Env> get_item_types(_Self&&, _Env&&) noexcept {
return {};
}

template <class _Self, class _Receiver>
using __receiver_t = __t<__receiver<__id<_Receiver>, __data_of<_Self>>>;

template <class _Self, class _Receiver>
using __operation_t = __t< __operation<__child_of<_Self>, __id<_Receiver>, __data_of<_Self>>>;

template <sender_expr_for<transform_each_t> _Self, receiver _Receiver>
requires __callabale_adaptor_for<
__data_of<_Self>,
__item_types_t<_Self, env_of_t<_Receiver>>>
&& sequence_receiver_of<_Receiver, __item_types_t<_Self, env_of_t<_Receiver>>>
&& sequence_sender_to<__child_of<_Self>, __receiver_t<_Self, _Receiver>>
static auto subscribe(_Self&& __self, _Receiver __rcvr) noexcept(
__nothrow_callable<apply_sender_t, _Self, __subscribe_fn<_Receiver>>)
-> __call_result_t<apply_sender_t, _Self, __subscribe_fn<_Receiver>> {
return apply_sender(static_cast<_Self&&>(__self), __subscribe_fn<_Receiver>{__rcvr});
}

template <sender_expr_for<transform_each_t> _Sexpr>
static env_of_t<__child_of<_Sexpr>> get_env(const _Sexpr& __sexpr) noexcept {
return apply_sender(__sexpr, []<class _Child>(__ignore, __ignore, const _Child& __child) {
return stdexec::get_env(__child);
});
}
};
}

using __transform_each::transform_each_t;
inline constexpr transform_each_t transform_each{};
}
6 changes: 5 additions & 1 deletion include/exec/sequence_senders.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,11 @@ namespace exec {

template <class _Sequence, class _Env>
using __sequence_completion_signatures_of_t = stdexec::__concat_completion_signatures_t<
stdexec::completion_signatures<stdexec::set_value_t()>,
stdexec::__try_make_completion_signatures<
_Sequence,
_Env,
stdexec::completion_signatures<stdexec::set_value_t()>,
stdexec::__mconst<stdexec::completion_signatures<>>>,
stdexec::__mapply<
stdexec::__q<stdexec::__concat_completion_signatures_t>,
stdexec::__mapply<
Expand Down
23 changes: 20 additions & 3 deletions include/stdexec/__detail/__config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,31 @@
#define STDEXEC_FUN_ARGS(...) STDEXEC_CAT(STDEXEC_EAT_THIS_DETAIL_, __VA_ARGS__))
#endif

// Configure extra type checking
#define STDEXEC_TYPE_CHECKING_ZERO() 0
#define STDEXEC_TYPE_CHECKING_ONE() 1
#define STDEXEC_TYPE_CHECKING_TWO() 2

#define STDEXEC_PROBE_TYPE_CHECKING_ STDEXEC_TYPE_CHECKING_ONE
#define STDEXEC_PROBE_TYPE_CHECKING_0 STDEXEC_TYPE_CHECKING_ZERO
#define STDEXEC_PROBE_TYPE_CHECKING_1 STDEXEC_TYPE_CHECKING_ONE
#define STDEXEC_PROBE_TYPE_CHECKING_STDEXEC_ENABLE_EXTRA_TYPE_CHECKING STDEXEC_TYPE_CHECKING_TWO

#define STDEXEC_TYPE_CHECKING_WHICH3(...) STDEXEC_PROBE_TYPE_CHECKING_ ## __VA_ARGS__
#define STDEXEC_TYPE_CHECKING_WHICH2(...) STDEXEC_TYPE_CHECKING_WHICH3(__VA_ARGS__)
#define STDEXEC_TYPE_CHECKING_WHICH STDEXEC_TYPE_CHECKING_WHICH2(STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)

#ifndef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
// Compile times are bad enough on nvhpc. Disable extra type checking by default.
#if STDEXEC_NVHPC()
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 0
#elif STDEXEC_TYPE_CHECKING_WHICH() == 2
// do nothing
#elif STDEXEC_TYPE_CHECKING_WHICH() == 0
#undef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 0
#else
#undef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 1
#endif
#endif

namespace stdexec {
}
Loading

0 comments on commit 6dce716

Please sign in to comment.