diff --git a/.gitignore b/.gitignore index 4d60836..10c46a6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # Lines that start with '#' are comments. # Editor and misc backup files - anywhere +.vscode *~ .*~ .*.swp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a21dd2..fbb4ee2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,10 +6,11 @@ default: tags: - cuda -build-job: +build-job: stage: build script: - echo "Compiling the code..." + - export GPUFOAM_BACKEND_NVIDIA=1 - ./Allwmake -j 8 - echo "Compile complete." after_script: @@ -24,12 +25,12 @@ build-job: when: always expire_in: 8 hours -unit-test-job: - stage: test +unit-test-job: + stage: test before_script: - mkdir -p build/bin build/lib $FOAM_USER_APPBIN $FOAM_USER_LIBBIN - cp -r build/bin/. $FOAM_USER_APPBIN - cp -r build/lib/. $FOAM_USER_LIBBIN script: - - testGpuChemistry + - Test-gpuChemistry [CPU] - echo "Tests done." diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index cad7657..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "cmake.configureOnOpen": false -} \ No newline at end of file diff --git a/Allwmake b/Allwmake index d39ff15..5759313 100755 --- a/Allwmake +++ b/Allwmake @@ -2,6 +2,17 @@ cd ${0%/*} || exit 1 # Run from this directory +if [ -n "${GPUFOAM_BACKEND_NVIDIA}" ] +then + echo "Compiling gpuFoam for Nvidia backend" +elif [ -n "${GPUFOAM_BACKEND_AMD}" ] +then + echo "Compiling gpuFoam for AMD backend" +else + echo "You need to set either GPUFOAM_BACKEND_NVIDIA=1 or GPUFOAM_BACKEND_AMD=1 to compile gpu models." + exit +fi + # Compile gpu utilities gpu_utils/Allwmake diff --git a/README.md b/README.md index a843e11..0e476f6 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,55 @@ For more details check the README section in each of the subfolders listed above. ## Requirements -* C++17 supporting host compiler -* Nvidia [cuda compiler](https://developer.nvidia.com/hpc-sdk) (nvcc) version 10+ +* C++17 supporting host compiler +* Either Nvidia [cuda compiler](https://developer.nvidia.com/hpc-sdk) (nvcc) version 10+ +* Or AMD [hip compiler](https://rocm.docs.amd.com/projects/HIP/en/docs-6.0.0/how_to_guides/install.html) (hipcc) version 5.6+ * Latest [OpenFOAM foundation development release](https://openfoam.org/version/dev/) +## Compilation +Everything in this folder should compile with the Allwmake script. Before running it, ensure that you have a valid OpenFOAM installation and compilers available. + +* #### For Nvidia + + To check that you have valid compilers for the Nvidia backend + ``` + foamRun -help + g++ --version + nvcc --version + ``` + + Then check the name of your graphics card (for example with nvidia-smi) on and go to [this page](https://developer.nvidia.com/cuda-gpus) to see which compute capability it has. Modify the file ```./nvcpp``` accordingly if necessary. + + + + Finally, run the commands + ``` + export GPUFOAM_NVIDIA_BACKEND=1 + ./Allwmake + ``` + +* #### For AMD + + To check that you have valid compilers for the AMD backend + ``` + foamRun -help + g++ --version + hipcc --version + ``` + Then check the name of your graphics card and go to [this page](https://llvm.org/docs/AMDGPUUsage.html#processors) to see the matching architecture keyword. Modify the file ```./hipcc``` if necessary. + + Finally, run the commands + ``` + export GPUFOAM_AMD_BACKEND=1 + ./Allwmake + ``` + + + ## Credits This project contains source code taken from the following projects -* [mdspan](https://github.com/kokkos/mdspan) -* [variant](https://github.com/bryancatanzaro/variant) +* [mdspan](https://github.com/kokkos/mdspan) +* [variant](https://github.com/bryancatanzaro/variant) The licenses of the external projects can be found from their respective folders. \ No newline at end of file diff --git a/gpu_chemistry/Allwmake b/gpu_chemistry/Allwmake index 90cafe6..76fa4a6 100755 --- a/gpu_chemistry/Allwmake +++ b/gpu_chemistry/Allwmake @@ -9,7 +9,11 @@ wmakeLnInclude src wmake src/gpuKernelEvaluator wmake src/gpuChemistryModels wmake catchMain -wmake all unittest -#wmake all benchmark +wmake unittest/testHelpers +wmake unittest/OpenFOAMReferenceKernels +wmake unittest/cpuTestKernels +wmake unittest/gpuTestKernels +wmake unittest/tests +wmake all benchmark # ----------------------------------------------------------------- end-of-file diff --git a/gpu_chemistry/README.md b/gpu_chemistry/README.md index c1f6086..f2d9da1 100644 --- a/gpu_chemistry/README.md +++ b/gpu_chemistry/README.md @@ -1,13 +1,5 @@ # gpu_chemistry -## Compilation -Everything in this folder such compile with the Allwmake script. Before running it, ensure that you have a valid OpenFOAM installation and compilers available by typing the following commands - -``` -foamRun -help -g++ --version -nvcc --version -``` ## Performance diff --git a/gpu_chemistry/benchmark/others/Benchmark-others.cu b/gpu_chemistry/benchmark/Benchmark-gpuChemistry.cu similarity index 91% rename from gpu_chemistry/benchmark/others/Benchmark-others.cu rename to gpu_chemistry/benchmark/Benchmark-gpuChemistry.cu index f3341e9..765706f 100644 --- a/gpu_chemistry/benchmark/others/Benchmark-others.cu +++ b/gpu_chemistry/benchmark/Benchmark-gpuChemistry.cu @@ -8,10 +8,67 @@ #include "ludecompose.H" #include "gpuODESystem.H" #include "makeGpuOdeSolver.H" - +#include "gpuMemoryResource.H" static constexpr TestData::Mechanism mech = TestData::GRI; +using memoryResource_t = FoamGpu::gpuMemoryResource; + + +template +__global__ void on_device(T t, R* r) +{ + *r = t(); +} + + + +#ifdef __NVIDIA_BACKEND__ + + template + static inline gScalar eval(T t) + { + + gScalar *d_result; + gpuErrorCheck(cudaMalloc(&d_result, sizeof(gScalar))); + on_device<<<1,1>>>(t, d_result); + gpuErrorCheck(cudaGetLastError()); + gpuErrorCheck(cudaDeviceSynchronize()); + gScalar h_result; + gpuErrorCheck(cudaMemcpy(&h_result, d_result, sizeof(gScalar), cudaMemcpyDeviceToHost)); + gpuErrorCheck(cudaDeviceSynchronize()); + gpuErrorCheck(cudaFree(d_result)); + gpuErrorCheck(cudaDeviceSynchronize()); + return h_result; + + } + + //AMD-backend + #else + + template + static inline gScalar eval(T t) + { + + gScalar *d_result; + gpuErrorCheck(hipMalloc(&d_result, sizeof(gScalar))); + hipLaunchKernelGGL + ( + on_device, dim3(1), dim3(1), 0, 0, t, d_result + ); + gpuErrorCheck(hipGetLastError()); + gpuErrorCheck(hipDeviceSynchronize()); + gScalar h_result; + gpuErrorCheck(hipMemcpy(&h_result, d_result, sizeof(gScalar), hipMemcpyDeviceToHost)); + gpuErrorCheck(hipDeviceSynchronize()); + gpuErrorCheck(hipFree(d_result)); + gpuErrorCheck(hipDeviceSynchronize()); + return h_result; + + } + +#endif + TEST_CASE("LU") { @@ -29,9 +86,9 @@ TEST_CASE("LU") device_vector matrix(vals.begin(), vals.end()); - device_vector pivot(nEqns, 0); - device_vector v(nEqns, 0); - device_vector source(nEqns, 0); + device_vector pivot(nEqns); + device_vector v(nEqns); + device_vector source(nEqns); auto m_span = make_mdspan(matrix, extents<2>{nEqns, nEqns}); auto p_span = make_mdspan(pivot, extents<1>{nEqns}); @@ -129,6 +186,7 @@ TEST_CASE("gpuODESystem"){ } +/* TEST_CASE("gpuReactionRate"){ using namespace FoamGpu; @@ -138,8 +196,10 @@ TEST_CASE("gpuReactionRate"){ const gScalar T = 900.0; - device_vector c(nSpecie, 0.43); - device_vector ddc(nSpecie, 0.43); + device_vector c = toDeviceVector(TestData::get_concentration_vector(mech)); + device_vector ddc = c; + + //SECTION("gpuArrheniusReactionRate") { @@ -386,8 +446,8 @@ TEST_CASE("gpuReactionRate"){ for (int i = 0; i < nReactions; ++i){ auto pair = reactions[i].k_.everything(p, T, c, ddc); - ret += std::get<0>(pair); - ret += std::get<1>(pair); + ret += pair[0]; + ret += pair[1]; ret += ddc[0] + ddc[5] + ddc[7]; } @@ -412,10 +472,12 @@ TEST_CASE("gpuReactionRate"){ return eval(op4); }; + } } +*/ TEST_CASE("gpuReaction"){ @@ -429,17 +491,12 @@ TEST_CASE("gpuReaction"){ const gScalar p = 1E5; const gScalar T = 900.0; - device_vector c = [=](){ - std::vector vals(nSpecie); - assign_test_concentration(vals, mech); - //fill_random(vals); - return device_vector(vals.begin(), vals.end()); - }(); + device_vector c = toDeviceVector(TestData::get_concentration_vector(mech)); + + + + device_vector dndt(nEqns); - device_vector dndt = [=](){ - std::vector vals(nEqns); - return device_vector(vals.begin(), vals.end()); - }(); BENCHMARK("dNdtByV"){ diff --git a/gpu_chemistry/benchmark/Make/files b/gpu_chemistry/benchmark/Make/files new file mode 100644 index 0000000..ebad78c --- /dev/null +++ b/gpu_chemistry/benchmark/Make/files @@ -0,0 +1,5 @@ +Benchmark-gpuChemistry.cu + +EXE = $(FOAM_USER_APPBIN)/Benchmark-gpuChemistry + + diff --git a/gpu_chemistry/benchmark/others/Make/options b/gpu_chemistry/benchmark/Make/options similarity index 55% rename from gpu_chemistry/benchmark/others/Make/options rename to gpu_chemistry/benchmark/Make/options index a801a28..6c20210 100644 --- a/gpu_chemistry/benchmark/others/Make/options +++ b/gpu_chemistry/benchmark/Make/options @@ -1,12 +1,10 @@ EXE_INC = \ - -I../../catchMain/lnInclude \ - -I../../src/lnInclude \ - -I../../src/gpuChemistryModels/lnInclude \ - -I../../src/gpuKernelEvaluator/lnInclude \ - -I../../../gpu_utils/common/lnInclude \ - -I../../unittest/testHelpers/ \ - -I../ - -std=c++17 + -I../catchMain/lnInclude \ + -I../src/lnInclude \ + -I../src/gpuChemistryModels/lnInclude \ + -I../src/gpuKernelEvaluator/lnInclude \ + -I../../gpu_utils/common/lnInclude \ + -I../unittest/testHelpers/ EXE_LIBS = \ -L$(FOAM_USER_LIBBIN) \ @@ -23,5 +21,8 @@ EXE_LIBS = \ -lfiniteVolume \ -lchemistryModel - -include ../../nvcc \ No newline at end of file +ifeq ($(GPUFOAM_BACKEND_NVIDIA),1) + include ../../nvcc +else + include ../../hipcc +endif \ No newline at end of file diff --git a/gpu_chemistry/benchmark/benchmark_utilities.H b/gpu_chemistry/benchmark/benchmark_utilities.H deleted file mode 100644 index f466248..0000000 --- a/gpu_chemistry/benchmark/benchmark_utilities.H +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include -#include "gpu_constants.H" -#include "create_gpu_inputs.H" - -struct BenchmarkParams{ - - static constexpr gScalar deltaT = 1E-6; - static constexpr gScalar deltaTChemMax = 1E8; - static constexpr gScalar chemdeltaTMin = 1E-7; - static constexpr gScalar chemDeltaTmax = 1E-6; - static constexpr gLabel nCells = 1000; - - static constexpr gScalar absTol = 1E-1; - static constexpr gScalar relTol = 1E-12; - -}; - -static inline -std::vector make_random_y0s(gLabel nCells, gLabel nEqns) -{ - using namespace FoamGpu; - - gLabel nSpecie = nEqns - 2; - std::vector ret(nCells * nEqns); - auto yvf = make_mdspan(ret, extents<2>{nCells, nEqns}); - for (gLabel i = 0; i < nCells; ++i){ - for (gLabel j = 0; j < nSpecie; ++j){ - yvf(i, j) = random_number(0.01, 0.435); - } - yvf(i, nSpecie) = random_number(500.0, 1000.0); - yvf(i, nSpecie+1) = random_number(1E5, 1.2E5); - } - return ret; -} -static inline -std::vector make_tutorial_y0s(gLabel nCells, TestData::Mechanism mech) -{ - using namespace FoamGpu; - const gLabel nEqns = TestData::equationCount(mech); - std::vector ret(nCells * nEqns); - auto y0 = std::vector(nEqns); - assign_test_condition(y0, mech); - auto yvf = make_mdspan(ret, extents<2>{nCells, nEqns}); - for (gLabel i = 0; i < nCells; ++i){ - - for (gLabel j = 0; j < nEqns; ++j){ - yvf(i, j) = y0[j]; - } - - } - return ret; -} - - -static inline -std::vector make_random_rhos(gLabel nCells) -{ - using namespace FoamGpu; - std::vector ret(nCells, 1.0); - fill_random(ret, 0.9, 1.3); - return ret; -} -static inline -std::vector make_random_deltaTChem(gLabel nCells) -{ - using namespace FoamGpu; - std::vector ret(nCells, 1E-7); - fill_random(ret, BenchmarkParams::chemdeltaTMin, BenchmarkParams::chemDeltaTmax); - return ret; -} - - diff --git a/gpu_chemistry/benchmark/evaluator/Benchmark-evaluator.C b/gpu_chemistry/benchmark/evaluator/Benchmark-evaluator.C deleted file mode 100644 index 1e58485..0000000 --- a/gpu_chemistry/benchmark/evaluator/Benchmark-evaluator.C +++ /dev/null @@ -1,128 +0,0 @@ -#define CATCH_CONFIG_ENABLE_BENCHMARKING -//#define CATCH_CONFIG_MAIN -#include "catch.H" - -#include "gpuKernelEvaluator.H" -#include "readGpuOdeInputs.H" -#include "test_utilities.H" -#include "create_gpu_inputs.H" -#include "benchmark_utilities.H" - - - - -auto callGpuSolve -( - gScalar deltaT, - gScalar deltaTChemMax, - const std::vector& rho, - const std::vector& deltaTChem, - const std::vector& Yvf, - FoamGpu::GpuKernelEvaluator& eval -) -{ - using namespace FoamGpu; - return eval.computeRR(deltaT, deltaTChemMax, rho, deltaTChem, Yvf); -} - - -static inline Foam::dictionary make_dict(std::string solverName) -{ - Foam::dictionary dict; - dict.add("solver", solverName); - dict.add("absTol", BenchmarkParams::absTol); - dict.add("relTol", BenchmarkParams::relTol); - return dict; -} - -//std::vector make_random_ - -FoamGpu::GpuKernelEvaluator make_evaluator -( - gLabel nCells, const Foam::dictionary& odeDict, TestData::Mechanism m) -{ - using namespace FoamGpu; - auto thermos = TestData::makeGpuThermos(m); - auto reactions = TestData::makeGpuReactions(m); - gLabel nSpecie = TestData::speciesCount(m); - gLabel nEqns = TestData::equationCount(m); - - auto inputs = read_gpuODESolverInputs(odeDict); - - - return GpuKernelEvaluator - ( - nCells, - nEqns, - nSpecie, - thermos, - reactions, - inputs - ); - -} - -static inline void warmup() -{ - auto mech = TestData::H2; - const gScalar deltaT = BenchmarkParams::deltaT; - const gScalar deltaTChemMax = BenchmarkParams::deltaTChemMax; - const gLabel nCells = BenchmarkParams::nCells; - - const auto rho = make_random_rhos(nCells); - const auto deltaTChem = make_random_deltaTChem(nCells); - const auto Yvf = make_tutorial_y0s(nCells, mech); - //const auto Yvf = make_random_y0s(nCells, TestData::equationCount(mech)); - - //No idea why this is necessary, but without it, the first benchmark results are way off - BENCHMARK_ADVANCED("WARMUP")(Catch::Benchmark::Chronometer meter) { - auto dict = make_dict("Rosenbrock23"); - auto eval = make_evaluator(nCells, dict, mech); - meter.measure([&] { return callGpuSolve(deltaT, deltaTChemMax, rho, deltaTChem, Yvf, eval);}); - }; -} - -static inline -void runBenchmarks(TestData::Mechanism mech) -{ - const gScalar deltaT = BenchmarkParams::deltaT; - const gScalar deltaTChemMax = BenchmarkParams::deltaTChemMax; - const gLabel nCells = 1E4; - - const auto rho = make_random_rhos(nCells); - const auto deltaTChem = make_random_deltaTChem(nCells); - const auto Yvf = make_tutorial_y0s(nCells, mech); - - BENCHMARK_ADVANCED("Rosenbrock12")(Catch::Benchmark::Chronometer meter) { - auto dict = make_dict("Rosenbrock12"); - auto eval = make_evaluator(nCells, dict, mech); - meter.measure([&] { return callGpuSolve(deltaT, deltaTChemMax, rho, deltaTChem, Yvf, eval);}); - }; - - BENCHMARK_ADVANCED("Rosenbrock23")(Catch::Benchmark::Chronometer meter) { - auto dict = make_dict("Rosenbrock23"); - auto eval = make_evaluator(nCells, dict, mech); - meter.measure([&] { return callGpuSolve(deltaT, deltaTChemMax, rho, deltaTChem, Yvf, eval);}); - }; - - - BENCHMARK_ADVANCED("Rosenbrock34")(Catch::Benchmark::Chronometer meter) { - auto dict = make_dict("Rosenbrock34"); - auto eval = make_evaluator(nCells, dict, mech); - meter.measure([&] { return callGpuSolve(deltaT, deltaTChemMax, rho, deltaTChem, Yvf, eval);}); - }; -} - -TEST_CASE("Benchmark warmup") -{ - warmup(); -} -TEST_CASE("Benchmark GpuKernelEvaluator (H2)") -{ - runBenchmarks(TestData::H2); -} - -TEST_CASE("Benchmark GpuKernelEvaluator (GRI)") -{ - runBenchmarks(TestData::GRI); -} diff --git a/gpu_chemistry/benchmark/evaluator/Make/files b/gpu_chemistry/benchmark/evaluator/Make/files deleted file mode 100644 index b65f470..0000000 --- a/gpu_chemistry/benchmark/evaluator/Make/files +++ /dev/null @@ -1,5 +0,0 @@ -Benchmark-evaluator.C - -EXE = $(FOAM_USER_APPBIN)/gpuChemistrybenchmarkEvaluator - - diff --git a/gpu_chemistry/benchmark/others/Make/files b/gpu_chemistry/benchmark/others/Make/files deleted file mode 100644 index 5bae57e..0000000 --- a/gpu_chemistry/benchmark/others/Make/files +++ /dev/null @@ -1,5 +0,0 @@ -Benchmark-others.cu - -EXE = $(FOAM_USER_APPBIN)/gpuChemistrybenchmarkOthers - - diff --git a/gpu_chemistry/hipcc b/gpu_chemistry/hipcc deleted file mode 100644 index 069b6bd..0000000 --- a/gpu_chemistry/hipcc +++ /dev/null @@ -1,37 +0,0 @@ -SUFFIXES += .cu - -c++WARN = - - -NVCC_FLAGS = --std=c++17 -NVCC_FLAGS += --expt-relaxed-constexpr -NVCC_FLAGS += --expt-extended-lambda -#NVCC_FLAGS += --use_fast_math -#NVCC_FLASG += --generate-line-info -#NVCC_FLAGS += -lineinfo -#NVCC_FLAGS += -fmad=false -#NVCC_FLAGS += --gpu-architecture=compute_80 --gpu-code=sm_80 -#NVCC_FLAGS += --gpu-architecture=compute_86 --gpu-code=sm_86 - - -CC = hipcc $(NVCC_FLAGS) - -ptFLAGS = -DNoRepository - -c++FLAGS = $(GFLAGS) $(c++WARN) $(c++DBUG) $(ptFLAGS) \ - $(LIB_HEADER_DIRS) -fPIC - - -Ctoo = $(WM_SCHEDULER) $(CC) $(c++FLAGS) -c $< -o $@ -cxxtoo = $(Ctoo) -cctoo = $(Ctoo) -cpptoo = $(Ctoo) -cutoo = $(Ctoo) - -LINK_LIBS = $(c++DBUG) - -LINKLIBSO = $(CC) $(c++FLAGS) -shared \ - -Xlinker --add-needed -Xlinker --no-as-needed - -LINKEXE = $(CC) $(c++FLAGS) \ - -Xlinker --add-needed -Xlinker --no-as-needed diff --git a/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.C b/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.C index cba36b2..3745746 100644 --- a/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.C +++ b/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.C @@ -243,8 +243,16 @@ scalar gpuChemistryModel::computeRRAndChemDeltaT(const scalar& de if (!this->chemistry_) { return great; } + /* auto [RR, deltaTChem, minDeltaT] = evaluator_.computeRR( deltaT, deltaTChemMax_, getRho0(), getDeltaTChem(), getY0()); + */ + auto tpl = evaluator_.computeRR( + deltaT, deltaTChemMax_, getRho0(), getDeltaTChem(), getY0()); + + auto RR = std::get<0>(tpl); + auto deltaTChem = std::get<1>(tpl); + auto minDeltaT = std::get<2>(tpl); auto RRs = make_mdspan(RR, extents<2>{nCells(), nSpecie()}); diff --git a/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.H b/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.H index d80f5a1..150ccf4 100644 --- a/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.H +++ b/gpu_chemistry/src/gpuChemistryModels/gpuChemistryModel/gpuChemistryModel.H @@ -4,10 +4,7 @@ #include "basicChemistryModel.H" -#include "gpuODESystem.H" -#include "gpuReaction.H" -#include "gpuRosenbrock34.H" -#include "gpuThermo.H" + #include "makeGpuReactions.H" #include "makeGpuThermo.H" #include "readGpuOdeInputs.H" @@ -15,8 +12,8 @@ #include "DynamicField.H" #include "ODESystem.H" #include "ReactionList.H" + #include "chemistryReductionMethod.H" -#include "chemistryTabulationMethod.H" #include "gpuKernelEvaluator.H" #include "multicomponentMixture.H" diff --git a/gpu_chemistry/src/gpuEquationOfState/gpuPerfectGas.H b/gpu_chemistry/src/gpuEquationOfState/gpuPerfectGas.H index 24d5878..84126d3 100644 --- a/gpu_chemistry/src/gpuEquationOfState/gpuPerfectGas.H +++ b/gpu_chemistry/src/gpuEquationOfState/gpuPerfectGas.H @@ -4,7 +4,6 @@ #include "gpu_constants.H" #include - namespace FoamGpu { class gpuPerfectGas { @@ -16,21 +15,22 @@ public: //- Molecular weight of specie [kg/kmol] gScalar molWeight_; - /// @brief Unary += operator. Increments the mass fraction directly and - /// scales the molecular weight appropriately (?). + /// @brief Unary += operator. Increments the mass fraction + /// directly and scales the molecular weight appropriately (?). /// @param st RHS gpuPerfectGas object inline void operator+=(const gpuPerfectGas& st) { const gScalar sumY = Y_ + st.Y_; if (std::abs(sumY) > gpuSmall) { - molWeight_ = sumY / (Y_ / molWeight_ + st.Y_ / st.molWeight_); + molWeight_ = + sumY / (Y_ / molWeight_ + st.Y_ / st.molWeight_); } Y_ = sumY; } - /// @brief Unary *= operator. Scales the mass fraction with the input - /// scalar. + /// @brief Unary *= operator. Scales the mass fraction with the + /// input scalar. /// @param s The scalar used to scale the mass fraction. inline void operator*=(gScalar s) { Y_ *= s; } @@ -49,16 +49,19 @@ public: /// @return The molecular weight of this specie. inline CUDA_HOSTDEV gScalar W() const { return molWeight_; } - /// @brief Get the mass fraction of this specie in the the mixture. + /// @brief Get the mass fraction of this specie in the the + /// mixture. /// @return The mass fraction of this specie. inline CUDA_HOSTDEV gScalar Y() const { return Y_; } /// @brief Get the gas constant [J/kg/K]. /// @return The gas constant of this specie. - inline CUDA_HOSTDEV gScalar R() const { return gpuRR / molWeight_; } + inline CUDA_HOSTDEV gScalar R() const { + return gpuRR / molWeight_; + } - /// @brief Computes the density [kg/m^3] of this specie based on the input - /// pressure and temperature. + /// @brief Computes the density [kg/m^3] of this specie based on + /// the input pressure and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The density of this specie. @@ -66,19 +69,22 @@ public: return p / (this->R() * T); } - /// @brief Computes the enthalpy [J/kg] contribution of this specie in the - /// mixture based on the inpute pressure and temperature. + /// @brief Computes the enthalpy [J/kg] contribution of this + /// specie in the mixture based on the inpute pressure and + /// temperature. /// @param p Pressure. /// @param T Temperature. /// @return The enthalpy contribution of this specie. - inline CUDA_HOSTDEV gScalar H(const gScalar p, const gScalar T) const { + inline CUDA_HOSTDEV gScalar H(const gScalar p, + const gScalar T) const { (void)p; (void)T; return 0.0; } - /// @brief Computes the heat capacity [J/(kg K)] contribution of this specie - /// in the mixture in based on the input pressure and temperature. + /// @brief Computes the heat capacity [J/(kg K)] contribution of + /// this specie in the mixture in based on the input pressure and + /// temperature. /// @param p Pressure. /// @param T Temperature. /// @return The heat capacity contribution of this specie. @@ -88,21 +94,22 @@ public: return 0.0; } - /// @brief Computes the internal energy [J/kg] contribution of this specie - /// in the mixture based on the input pressure and temperature. Assumes - /// contant pressure. + /// @brief Computes the internal energy [J/kg] contribution of + /// this specie in the mixture based on the input pressure and + /// temperature. Assumes contant pressure. /// @param p Pressure. /// @param T Temperature. /// @return The heat internal energy contribution of this specie. - inline CUDA_HOSTDEV gScalar E(const gScalar p, const gScalar T) const { + inline CUDA_HOSTDEV gScalar E(const gScalar p, + const gScalar T) const { (void)p; (void)T; return 0.0; } - /// @brief Computes the heat capacity [J/(kg K)] contribution of this specie - /// in the mixture based on the input pressure and temperature. Assumes - /// constant volume. + /// @brief Computes the heat capacity [J/(kg K)] contribution of + /// this specie in the mixture based on the input pressure and + /// temperature. Assumes constant volume. /// @param p Pressure. /// @param T Temperature. /// @return The heat capacity contribution of this specie. @@ -112,30 +119,34 @@ public: return 0.0; } - /// @brief Computes the entropy [J/kg/K] contribution to the integral of - /// Cp/T in the mixture based on the input pressure and temperature. + /// @brief Computes the entropy [J/kg/K] contribution to the + /// integral of Cp/T in the mixture based on the input pressure + /// and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The entropy contribution to the integral of Cp/T. - inline CUDA_HOSTDEV gScalar Sp(const gScalar p, const gScalar T) const { + inline CUDA_HOSTDEV gScalar Sp(const gScalar p, + const gScalar T) const { (void)T; return -this->R() * std::log(p / gpuPstd); } - /// @brief Computes the entropy [J/kg/K] contribution to the integral of - /// Cv/T in the mixture based on the input pressure and temperature. + /// @brief Computes the entropy [J/kg/K] contribution to the + /// integral of Cv/T in the mixture based on the input pressure + /// and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The entropy contribution to the integral of Cv/T. - inline CUDA_HOSTDEV gScalar Sv(const gScalar p, const gScalar T) const { + inline CUDA_HOSTDEV gScalar Sv(const gScalar p, + const gScalar T) const { // throw std::logic_error("Not implemented"); (void)p; (void)T; return 0.0; } - /// @brief Computes the compressibility [s^2/m^2] of this specie in the - /// mixture based on the input pressure and temperature. + /// @brief Computes the compressibility [s^2/m^2] of this specie + /// in the mixture based on the input pressure and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The compressibility of this specie. @@ -144,8 +155,8 @@ public: return 1.0 / (this->R() * T); } - /// @brief Computes the unitless compression factor of this specie based on - /// the input pressure and temperature. + /// @brief Computes the unitless compression factor of this specie + /// based on the input pressure and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The compression factor of this specie. @@ -155,8 +166,8 @@ public: return 1.0; } - /// @brief Computes the difference (Cp - Cv) [J/(kg K)] based on the input - /// pressure and temperature. + /// @brief Computes the difference (Cp - Cv) [J/(kg K)] based on + /// the input pressure and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The difference Cp - Cv. @@ -166,20 +177,21 @@ public: return this->R(); } - /// @brief Computes the volumetric coefficient of thermal expansion [1/T] - /// based on the input pressure and temperature. + /// @brief Computes the volumetric coefficient of thermal + /// expansion [1/T] based on the input pressure and temperature. /// @param p Pressure. /// @param T Temperature. /// @return The volumetric coefficient of thhermal expansion. - inline CUDA_HOSTDEV gScalar alphav(const gScalar p, const gScalar T) const { + inline CUDA_HOSTDEV gScalar alphav(const gScalar p, + const gScalar T) const { (void)p; return 1.0 / T; } }; -/// @brief Binary + operator for two gpuPerfectGas objects by summing the mass -/// fractions and molecular weights. Ensures that no zero division occurs. Note! -/// Host only. +/// @brief Binary + operator for two gpuPerfectGas objects by summing +/// the mass fractions and molecular weights. Ensures that no zero +/// division occurs. Note! Host only. /// @param st1 LHS gpuPerfectGas. /// @param st2 RHS gpuPerfectGas. /// @return A new gpuPerfectGas object with summed properties. @@ -188,37 +200,41 @@ inline gpuPerfectGas operator+(const gpuPerfectGas& st1, const gScalar sumY = std::max(st1.Y_ + st2.Y_, gpuSmall); if (std::abs(sumY) > gpuSmall) { - return gpuPerfectGas( - sumY, sumY / (st1.Y_ / st1.molWeight_ + st2.Y_ / st2.molWeight_)); + return gpuPerfectGas(sumY, + sumY / (st1.Y_ / st1.molWeight_ + + st2.Y_ / st2.molWeight_)); } return st1; } /// @brief Binary * operator for scalar and gpuPerfectGas object. The -/// multiplication is carried out only for the mass fraction. Note! Host only. +/// multiplication is carried out only for the mass fraction. Note! +/// Host only. /// @param s LHS scalar. /// @param st RHS gpuPerfectGas. -/// @return A new gpuPerfectGas object where the mass fraction has been scaled -/// by the input scalar. +/// @return A new gpuPerfectGas object where the mass fraction has +/// been scaled by the input scalar. inline gpuPerfectGas operator*(gScalar s, const gpuPerfectGas& st) { return gpuPerfectGas(s * st.Y_, st.molWeight_); } -/// @brief Binary == operator for two gpuPerfectGas objects. The mass fraction -/// and molecular weight of the retuned new object has the difference of the -/// mass fractions and molecular weights. +/// @brief Binary == operator for two gpuPerfectGas objects. The mass +/// fraction and molecular weight of the retuned new object has the +/// difference of the mass fractions and molecular weights. /// @param st1 LHS gpuPerfectGas. /// @param st2 RHS gpuPerfectGas. -/// @return A new gpuPerfectGas object with (st2.Y - st1.Y) as a mass fraction -/// and scaled difference of the molecular weights as the molecular weight. +/// @return A new gpuPerfectGas object with (st2.Y - st1.Y) as a mass +/// fraction and scaled difference of the molecular weights as the +/// molecular weight. inline gpuPerfectGas operator==(const gpuPerfectGas& st1, const gpuPerfectGas& st2) { gScalar diffY = st2.Y_ - st1.Y_; if (std::abs(diffY) < gpuSmall) { diffY = gpuSmall; } - const gScalar diffRW = st2.Y_ / st2.molWeight_ - st1.Y_ / st1.molWeight_; + const gScalar diffRW = + st2.Y_ / st2.molWeight_ - st1.Y_ / st1.molWeight_; gScalar molWeight = gpuGreat; if (std::abs(diffRW) > gpuSmall) { molWeight = diffY / diffRW; } diff --git a/gpu_chemistry/src/gpuKernelEvaluator/Make/options b/gpu_chemistry/src/gpuKernelEvaluator/Make/options index 76fa81e..74929b5 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/Make/options +++ b/gpu_chemistry/src/gpuKernelEvaluator/Make/options @@ -7,5 +7,9 @@ EXE_INC = \ #LIB_LIBS = -lcudart_static -lcudart LIB_LIBS += -L$(CUDA_LIBS) -lcudart -#include ../../hipcc -include ../../nvcc \ No newline at end of file +ifeq ($(GPUFOAM_BACKEND_NVIDIA),1) + include ../../../nvcc +else + include ../../../hipcc +endif + diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.H b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.H index aedf6aa..a03a2e2 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.H +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.H @@ -8,34 +8,16 @@ #include "error_handling.H" #include "gpuMemoryResource.H" +#include "gpuODESolver.H" #include "gpuODESolverInputs.H" #include "gpuODESystem.H" -#include "gpuReaction.H" -#include "gpuReactionRate.H" -#include "host_device_vectors.H" -#include "makeGpuOdeSolver.H" #include "thermosAndReactions.H" -#include "singleCellSolver.H" - namespace FoamGpu { struct GpuKernelEvaluator { private: - /// @brief Given an input solution ode vectors Y and time steps, compute the - /// new solution vectors and time step field by calling the an ode. - /// @param deltaT Current flow time step. - /// @param deltaTChemMax Maximum chemistry time step.. - /// @param deltaTChem Current cell-specific chemistry time steps. - /// @param Y The current cell-specific solution vectors [Y, T, p]. - /// @return New solution vectors and chemistry time steps. - std::pair, std::vector> - computeYNew(gScalar deltaT, - gScalar deltaTChemMax, - const std::vector& deltaTChem, - const std::vector& Y); - public: GpuKernelEvaluator() = default; @@ -50,14 +32,31 @@ public: ~GpuKernelEvaluator() = default; - /// @brief Computes the reaction rate based on the input flow properties. + /// @brief Given an input solution ode vectors Y and time steps, + /// compute the new solution vectors and time step field by + /// calling the an ode. + /// @param deltaT Current flow time step. + /// @param deltaTChemMax Maximum chemistry time step.. + /// @param deltaTChem Current cell-specific chemistry time steps. + /// @param Y The current cell-specific solution vectors [Y, T, p]. + /// @return New solution vectors and chemistry time steps. + std::pair, std::vector> + computeYNew(gScalar deltaT, + gScalar deltaTChemMax, + const std::vector& deltaTChem, + const std::vector& Y); + + /// @brief Computes the reaction rate based on the input flow + /// properties. /// @param deltaT Current flow time step. /// @param deltaTChemMax Maximum chemistry time step. /// @param rho The density field. /// @param deltaTChem Current cell-specific chemistry time steps. - /// @param Yvf The current cell-specific solution vectors [Y, T, p]. - /// @return Vector of new reaction rates of size nCells x nSpecie, new - /// chemistry time steps and the minimum new chemistry time step. + /// @param Yvf The current cell-specific solution vectors [Y, T, + /// p]. + /// @return Vector of new reaction rates of size nCells x nSpecie, + /// new chemistry time steps and the minimum new chemistry time + /// step. std::tuple, std::vector, gScalar> computeRR(gScalar deltaT, gScalar deltaTChemMax, @@ -71,7 +70,7 @@ private: gpuODESystem system_; gpuODESolver solver_; gpuODESolverInputs inputs_; - gpuMemoryResource memory_; + gpuMemoryResource memory_; }; } // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.cu b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.cu index 2eb9267..83712c7 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.cu +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuKernelEvaluator.cu @@ -6,9 +6,8 @@ #include "for_each_index.H" #include "host_device_vectors.H" -#include -#include //min_element -#include +#include "makeGpuOdeSolver.H" +#include "singleCellSolver.H" namespace FoamGpu { @@ -29,41 +28,7 @@ GpuKernelEvaluator::GpuKernelEvaluator( thermosReactions_.reactions()) , solver_(make_gpuODESolver(system_, odeInputs)) , inputs_(odeInputs) - , memory_(nCells, nSpecie) { - -} - - -/* -static inline auto parseTimes(const char* label, - const std::vector& b) { - auto mmin = - std::min_element(b.begin(), b.end(), [=](auto lhs, auto rhs) { - return lhs.get_time(label) < rhs.get_time(label); - })->get_time(label); - - auto mmax = - std::max_element(b.begin(), b.end(), [=](auto lhs, auto rhs) { - return lhs.get_time(label) < rhs.get_time(label); - })->get_time(label); - - auto sum = std::accumulate( - b.begin(), b.end(), double(0), [=](auto lhs, auto rhs) { - return lhs + rhs.get_time(label); - }); - - - std::cout - << label - << " min: " << mmin - << " max: " << mmax - << " sum: " << sum - << std::endl; - - - return std::make_tuple(mmin, mmax, sum); -} -*/ + , memory_(nCells, nSpecie) {} std::pair, std::vector> GpuKernelEvaluator::computeYNew( @@ -91,16 +56,6 @@ GpuKernelEvaluator::computeYNew( for_each_index(op, nCells); - - /* - gLabel NTHREADS = 32; - gLabel NBLOCKS = (nCells + NTHREADS - 1) / NTHREADS; - cuda_kernel<<>>(nCells, op); - - CHECK_LAST_CUDA_ERROR(); - gpuErrorCheck(cudaDeviceSynchronize()); - */ - return std::make_pair(toStdVector(dYvf_arr), toStdVector(ddeltaTChem_arr)); } diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.H b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.H index 5777dab..bc997bf 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.H +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.H @@ -18,15 +18,15 @@ struct gpuMemoryResource : public memoryResource { ~gpuMemoryResource(); - std::array& getLabelData() override { + device_array& getLabelData() override { return labelData_; } - std::array& getScalarData() override { + device_array& getScalarData() override { return scalarData_; } - std::array& + device_array& getTwodScalarData() override { return twodScalarData_; } @@ -38,9 +38,9 @@ private: void allocate(); void deallocate(); - std::array labelData_{}; - std::array scalarData_{}; - std::array twodScalarData_{}; + device_array labelData_{}; + device_array scalarData_{}; + device_array twodScalarData_{}; }; } // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.cu b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.cu index f1f0b94..159b19e 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.cu +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/gpuMemoryResource.cu @@ -1,6 +1,6 @@ -#include "gpuMemoryResource.H" #include "device_allocate.H" #include "device_free.H" +#include "gpuMemoryResource.H" namespace FoamGpu { @@ -17,10 +17,12 @@ void gpuMemoryResource::allocate() { labelData_[i] = device_allocate(labelArrayLength()); } for (gLabel i = 0; i < N_SCALAR_ARRAYS; ++i) { - scalarData_[i] = device_allocate(scalarArrayLength()); + scalarData_[i] = + device_allocate(scalarArrayLength()); } for (gLabel i = 0; i < N_TWOD_SCALAR_ARRAYS; ++i) { - twodScalarData_[i] = device_allocate(twodScalarArrayLength()); + twodScalarData_[i] = + device_allocate(twodScalarArrayLength()); } } diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/singleCellSolver.H b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/singleCellSolver.H index 9a45f11..696e556 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/singleCellSolver.H +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/singleCellSolver.H @@ -1,8 +1,8 @@ #pragma once #include "gpuBuffer.H" -#include "gpu_constants.H" #include "gpuODESolver.H" +#include "gpu_constants.H" #include "mdspan.H" namespace FoamGpu { @@ -23,19 +23,21 @@ struct singleCellSolver { , ode_(ode) {} CUDA_HOSTDEV void operator()(gLabel celli) const { - auto Y = mdspan(&Yvf_(celli, 0), extents<1>{nSpecie_ + 2}); + auto Y = mdspan(&Yvf_(celli, 0), + extents<1>{nSpecie_ + 2}); // Initialise time progress gScalar timeLeft = deltaT_; - // Calculate the chemical source terms while (timeLeft > gpuSmall) { gScalar dt = timeLeft; ode_.solve(0, dt, Y, deltaTChem_[celli], buffer_[celli]); - for (int i = 0; i < nSpecie_; i++) { Y[i] = fmax(0.0, Y[i]); } + for (int i = 0; i < nSpecie_; i++) { + Y[i] = fmax(0.0, Y[i]); + } timeLeft -= dt; } diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.H b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.H index 8c361bc..f9c4881 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.H +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.H @@ -5,36 +5,27 @@ #include "gpuReaction.H" #include "gpuThermo.H" +namespace FoamGpu { -namespace FoamGpu{ - -struct thermosAndReactions{ +struct thermosAndReactions { thermosAndReactions() = default; - thermosAndReactions - ( - const std::vector thermos, - const std::vector& reactions - ); + thermosAndReactions(const std::vector thermos, + const std::vector& reactions); ~thermosAndReactions(); - - gpuThermo* thermos() {return thermos_;} - gpuReaction* reactions() {return reactions_;} - + gpuThermo* thermos() { return thermos_; } + gpuReaction* reactions() { return reactions_; } private: - gLabel nThermos_; - gLabel nReactions_; - gpuThermo* thermos_; + gLabel nThermos_; + gLabel nReactions_; + gpuThermo* thermos_; gpuReaction* reactions_; - void deallocate(); - - }; -} \ No newline at end of file +} // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.cu b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.cu index 1a47a79..b6b81bb 100644 --- a/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.cu +++ b/gpu_chemistry/src/gpuKernelEvaluator/gpuKernelEvaluator/thermosAndReactions.cu @@ -1,51 +1,33 @@ #include "thermosAndReactions.H" -#include "error_handling.H" #include "device_allocate.H" #include "device_free.H" +#include "error_handling.H" #include "host_device_transfers.H" namespace FoamGpu { template static inline T* allocateAndTransfer(const std::vector& t) { - - T* ptr = device_allocate(t.size()); - const auto bytesize = t.size() * sizeof(T); - gpuErrorCheck( - cudaMemcpy(ptr, t.data(), bytesize, cudaMemcpyHostToDevice)); - - return ptr; - /* - T* ptr = device_allocate(t.size()); + T* ptr = device_allocate(t.size()); host_to_device(t.begin(), t.end(), ptr); return ptr; - */ } -thermosAndReactions::thermosAndReactions -( +thermosAndReactions::thermosAndReactions( const std::vector thermos, - const std::vector& reactions -) + const std::vector& reactions) : nThermos_(gLabel(thermos.size())) , nReactions_(gLabel(reactions.size())) , thermos_(allocateAndTransfer(thermos)) , reactions_(allocateAndTransfer(reactions)) {} +thermosAndReactions::~thermosAndReactions() { this->deallocate(); } -thermosAndReactions::~thermosAndReactions() -{ - this->deallocate(); -} - - -void thermosAndReactions::deallocate() -{ +void thermosAndReactions::deallocate() { device_free(thermos_); device_free(reactions_); } - } // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/gpuOde/gpuODESystem.H b/gpu_chemistry/src/gpuOde/gpuODESystem.H index 6e9ceaf..713718e 100644 --- a/gpu_chemistry/src/gpuOde/gpuODESystem.H +++ b/gpu_chemistry/src/gpuOde/gpuODESystem.H @@ -176,8 +176,10 @@ public: auto Ha = buffer.tempField1(); auto c = buffer.c(); - const auto [rhoM, CpM] = - mixture.computeDerivativeRequired(y, Ha, c); + const auto pair = mixture.computeDerivativeRequired(y, Ha, c); + const auto rhoM = pair[0]; + const auto CpM = pair[1]; + for (gLabel ri = 0; ri < nReactions(); ++ri) { reactions()[ri].dNdtByV(p, T, c, dydx); @@ -216,8 +218,12 @@ public: auto Ha = buffer.tempField2(); auto c = buffer.c(); - const auto [rhoM, CpM, dCpMdT, alphavM] = - mixture.computeJacobianRequired(y, Cp, Ha, c); + const auto arr = mixture.computeJacobianRequired(y, Cp, Ha, c); + const auto rhoM = arr[0]; + const auto CpM = arr[1]; + const auto dCpMdT = arr[2]; + const auto alphavM = arr[3]; + auto tempField1 = buffer.k4(); diff --git a/gpu_chemistry/src/gpuReaction/gpuArrheniusReactionRate.H b/gpu_chemistry/src/gpuReaction/gpuArrheniusReactionRate.H index b584f5d..eab5f07 100644 --- a/gpu_chemistry/src/gpuReaction/gpuArrheniusReactionRate.H +++ b/gpu_chemistry/src/gpuReaction/gpuArrheniusReactionRate.H @@ -1,5 +1,6 @@ #pragma once +#include "device_array.H" #include "gpu_constants.H" #include "mdspan.H" @@ -99,11 +100,11 @@ public: ///@param c Concentration. ///@param ddc A span of at least size nSpcecie where the /// derivative is put. - ///@return std::pair A pair of scalars where the + ///@return device_array A pair of scalars where the /// first value is the reaction rate and second value is the /// derivative w.r.t temperature. /// - inline CUDA_HOSTDEV std::pair + inline CUDA_HOSTDEV device_array everything(const gScalar p, const gScalar T, const mdspan c, @@ -112,7 +113,7 @@ public: const auto Ak = computeAk(p, T); const auto dT = Ak * (beta_ + Ta_ / T) / T; - return std::make_pair(Ak, dT); + return device_array({Ak, dT}); } }; diff --git a/gpu_chemistry/src/gpuReaction/gpuFallOffReactionRate.H b/gpu_chemistry/src/gpuReaction/gpuFallOffReactionRate.H index a6b8ccf..399c68c 100644 --- a/gpu_chemistry/src/gpuReaction/gpuFallOffReactionRate.H +++ b/gpu_chemistry/src/gpuReaction/gpuFallOffReactionRate.H @@ -119,11 +119,11 @@ public: ///@param c Concentration. ///@param ddc A span of at least size nSpcecie where the /// derivative is put. - ///@return std::pair A pair of scalars where the + ///@return device_array A pair of scalars where the /// first value is the reaction rate and second value is the /// derivative w.r.t temperature. /// - inline CUDA_HOSTDEV std::pair + inline CUDA_HOSTDEV device_array everything(const gScalar p, const gScalar T, const mdspan c, @@ -152,7 +152,9 @@ public: } } - return std::make_pair(k, ddT); + return {k, ddT}; + + // return device_array({k, ddT}); } }; diff --git a/gpu_chemistry/src/gpuReaction/gpuLindemannFallOffFunction.H b/gpu_chemistry/src/gpuReaction/gpuLindemannFallOffFunction.H index 368261d..1a9056f 100644 --- a/gpu_chemistry/src/gpuReaction/gpuLindemannFallOffFunction.H +++ b/gpu_chemistry/src/gpuReaction/gpuLindemannFallOffFunction.H @@ -7,10 +7,8 @@ namespace FoamGpu { class gpuLindemannFallOffFunction { public: - gpuLindemannFallOffFunction() = default; - inline CUDA_HOSTDEV gScalar operator()(const gScalar T, const gScalar Pr) const { return 1.0; diff --git a/gpu_chemistry/src/gpuReaction/gpuReaction.H b/gpu_chemistry/src/gpuReaction/gpuReaction.H index 026e016..2014597 100644 --- a/gpu_chemistry/src/gpuReaction/gpuReaction.H +++ b/gpu_chemistry/src/gpuReaction/gpuReaction.H @@ -15,11 +15,11 @@ namespace FoamGpu { struct reactionParams { - gScalar omega = 0.0; - gScalar dwdT = 0.0; - std::array dCfdjs{}; - std::array dCrdjs{}; - mdspan ddc; + gScalar omega = 0.0; + gScalar dwdT = 0.0; + device_array dCfdjs{}; + device_array dCrdjs{}; + mdspan ddc; }; struct gpuSpecieCoeffs { @@ -44,7 +44,7 @@ class gpuReaction : public gpuThermo { public: using specieCoeffArray = - std::array; + device_array; //- Default temperature limits of applicability of gpuReaction // rates @@ -74,7 +74,7 @@ public: //- Temperature limits of applicability of gpuReaction rates gScalar Tlow_, Thigh_; - gScalar RSMALL{sqrt(gpuSmall)}; + // gScalar RSMALL{sqrt(gpuSmall)}; public: gpuReaction() = default; @@ -99,14 +99,16 @@ public: /// @brief Returns a span to the LHS specie coeffcients. /// @return A span to the LHS specie coefficients. - inline CUDA_HOSTDEV auto lhs() const { - return make_mdspan(lhs_, extents<1>{lhs_size_}); + inline CUDA_HOSTDEV mdspan lhs() const { + return mdspan( + lhs_.data(), extents<1>{lhs_size_}); } /// @brief Returns a span to the LHS specie coeffcients. /// @return A span to the LHS specie coefficients. - inline CUDA_HOSTDEV auto rhs() const { - return make_mdspan(rhs_, extents<1>{rhs_size_}); + inline CUDA_HOSTDEV mdspan rhs() const { + return mdspan( + rhs_.data(), extents<1>{rhs_size_}); } /// @brief Checks if the reaction rate of this reaction is @@ -250,14 +252,14 @@ public: /// specie concentrations. /// @param c A span of concentrations in a cell. /// @return An array of powers of the LHS species. - inline CUDA_HOSTDEV std::array + inline CUDA_HOSTDEV device_array lhsPowers(const mdspan c) const; /// @brief Computes the powers speciePow(c_i, e_i) of the RHS /// specie concentrations. /// @param c A span of concentrations in a cell. /// @return An array of powers of the RHS species. - inline CUDA_HOSTDEV std::array + inline CUDA_HOSTDEV device_array rhsPowers(const mdspan c) const; /// @brief Given an array of LHS concentration powers, computes @@ -265,14 +267,14 @@ public: /// @param lhsPow The input LHS powers to compute the product for. /// @return The product of the input powers. inline CUDA_HOSTDEV gScalar calcCf( - const std::array lhsPow) const; + const device_array lhsPow) const; /// @brief Given an array of RHS concentration powers, computes /// the product of them all. /// @param lhsPow The input RHS powers to compute the product for. /// @return The product of the input powers. inline CUDA_HOSTDEV gScalar calcCr( - const std::array rhsPow) const; + const device_array rhsPow) const; /// @brief Given precomputed LHS powers, computes the derivatives /// of the product of the powers w.r.t all concentrations on the @@ -282,10 +284,10 @@ public: /// @param c A span of concentrations in a cell. /// @return The derivative of the product of concentration powers /// w.r.t to all concentrations on the LHS. - inline CUDA_HOSTDEV std::array - calcdCfdcj(const std::array lhsPow, - const gScalar Cf, - const mdspan c) const; + inline CUDA_HOSTDEV device_array + calcdCfdcj(const device_array lhsPow, + const gScalar Cf, + const mdspan c) const; /// @brief Given precomputed RHS powers, computes the derivatives /// of the product of the powers w.r.t all concentrations on the @@ -295,10 +297,10 @@ public: /// @param c A span of concentrations in a cell. /// @return The derivative of the product of concentration powers /// w.r.t to all concentrations on the RHS. - inline CUDA_HOSTDEV std::array - calcdCrdcj(const std::array rhsPow, - const gScalar Cr, - const mdspan c) const; + inline CUDA_HOSTDEV device_array + calcdCrdcj(const device_array rhsPow, + const gScalar Cr, + const mdspan c) const; inline CUDA_HOSTDEV void jac_dCfdcj_contribution(const reactionParams& r, @@ -343,11 +345,11 @@ inline CUDA_HOSTDEV bool gpuReaction::rhsSmallConcentration( return false; } -inline CUDA_HOSTDEV std::array +inline CUDA_HOSTDEV device_array gpuReaction::lhsPowers(const mdspan c) const { - const auto llhs = lhs(); - std::array ret{}; + const auto llhs = lhs(); + device_array ret{}; for (size_t i = 0; i < llhs.size(); ++i) { const auto& el = llhs[i].exponent; const auto& si = llhs[i].index; @@ -356,11 +358,11 @@ gpuReaction::lhsPowers(const mdspan c) const { return ret; } -inline CUDA_HOSTDEV std::array +inline CUDA_HOSTDEV device_array gpuReaction::rhsPowers(const mdspan c) const { - const auto rrhs = rhs(); - std::array ret{}; + const auto rrhs = rhs(); + device_array ret{}; for (size_t i = 0; i < rrhs.size(); ++i) { const auto& er = rrhs[i].exponent; const auto& si = rrhs[i].index; @@ -370,26 +372,26 @@ gpuReaction::rhsPowers(const mdspan c) const { } inline CUDA_HOSTDEV gScalar gpuReaction::calcCf( - const std::array lhsPow) const { + const device_array lhsPow) const { gScalar Cf = 1.0; for (gLabel i = 0; i < lhs_size_; ++i) { Cf *= lhsPow[i]; } return Cf; } inline CUDA_HOSTDEV gScalar gpuReaction::calcCr( - const std::array rhsPow) const { + const device_array rhsPow) const { gScalar Cr = 1.0; for (gLabel i = 0; i < rhs_size_; ++i) { Cr *= rhsPow[i]; } return Cr; } -inline CUDA_HOSTDEV std::array +inline CUDA_HOSTDEV device_array gpuReaction::calcdCfdcj( - const std::array lhsPow, - const gScalar Cf, - const mdspan c) const { + const device_array lhsPow, + const gScalar Cf, + const mdspan c) const { - std::array ret{}; + device_array ret{}; /* //Should be just this but overflows @@ -426,12 +428,12 @@ inline CUDA_HOSTDEV std::array return ret; } -inline CUDA_HOSTDEV std::array +inline CUDA_HOSTDEV device_array gpuReaction::calcdCrdcj( - const std::array rhsPow, - const gScalar Cr, - const mdspan c) const { - std::array ret{}; + const device_array rhsPow, + const gScalar Cr, + const mdspan c) const { + device_array ret{}; /* //Should be just this but overflows @@ -472,9 +474,10 @@ gpuReaction::omega(const gScalar p, const gScalar T, const mdspan c) const { - const gScalar clippedT = fmin(fmax(T, this->Tlow()), this->Thigh()); + const gScalar clippedT = + fmin(fmax(T, this->Tlow()), this->Thigh()); - const gScalar Kc = fmax(RSMALL, this->Kc(p, T)); + const gScalar Kc = fmax(sqrt(gpuSmall), this->Kc(p, T)); // Rate constants const gScalar kf = this->kf(p, clippedT, c); const gScalar kr = this->kr(kf, p, clippedT, Kc, c); @@ -539,7 +542,7 @@ gpuReaction::kr(const gScalar p, const gScalar T, const mdspan c) const { if (isIrreversible()) { return 0.0; } - const gScalar Kc = fmax(this->Kc(p, T), RSMALL); + const gScalar Kc = fmax(this->Kc(p, T), sqrt(gpuSmall)); return kr(kf(p, T, c), p, T, Kc, c); } @@ -654,7 +657,9 @@ computeReactionParameters(const gpuReaction& r, mdspan ddc) { reactionParams ret; - const auto [kf, ddT] = r.k_.everything(p, T, c, ddc); + const auto pair = r.k_.everything(p, T, c, ddc); + const auto kf = pair[0]; + const auto ddT = pair[1]; const auto lhsPow = r.lhsPowers(c); const auto Cf = r.calcCf(lhsPow); @@ -668,15 +673,17 @@ computeReactionParameters(const gpuReaction& r, gScalar Kc = 1.0; if (!r.isIrreversible()) { - const auto [Kc_t, dKcdTbyV] = r.KcAnddKcTbyKc(p, T); - Kc = fmax(Kc_t, r.RSMALL); - const auto kr = kf / Kc; + const auto pair2 = r.KcAnddKcTbyKc(p, T); + const auto Kc_t = pair2[0]; + const auto dKcdTbyV = pair2[1]; + Kc = fmax(Kc_t, sqrt(gpuSmall)); + const auto kr = kf / Kc; const auto rhsPow = r.rhsPowers(c); Cr = r.calcCr(rhsPow); const auto dkrdT = - ddT / Kc - (Kc > r.RSMALL ? kr * dKcdTbyV : 0.0); + ddT / Kc - (Kc > sqrt(gpuSmall) ? kr * dKcdTbyV : 0.0); ret.dwdT -= Cr * dkrdT; ret.omega -= Cr * kr; diff --git a/gpu_chemistry/src/gpuReaction/gpuReactionRate.H b/gpu_chemistry/src/gpuReaction/gpuReactionRate.H index 75018ab..11b931c 100644 --- a/gpu_chemistry/src/gpuReaction/gpuReactionRate.H +++ b/gpu_chemistry/src/gpuReaction/gpuReactionRate.H @@ -126,7 +126,7 @@ struct everythingVisitor { mdspan dkfdc; template - CUDA_HOSTDEV std::pair + CUDA_HOSTDEV device_array operator()(const T& i) const { return i.everything(p, Temperature, c, dkfdc); } @@ -145,7 +145,7 @@ struct gpuReactionRate { /// ///@brief Determines if the underlying reaction is irreversible or - ///not. + /// not. /// ///@return bool True if is irreversible, false otherwise. /// @@ -176,7 +176,7 @@ struct gpuReactionRate { ///@brief Determines if the rate is a function of concentration. /// ///@return True if is a function of concentration, false - ///otherwise. + /// otherwise. /// inline CUDA_HOSTDEV bool hasDdc() const { @@ -230,11 +230,11 @@ struct gpuReactionRate { ///@param c Concentration. ///@param dkfdc A span of at least size nSpcecie where the /// derivative is put. - ///@return std::pair A pair of scalars where the + ///@return device_array A pair of scalars where the /// first value is the reaction rate and second value is the /// derivative w.r.t temperature. /// - inline CUDA_HOSTDEV std::pair + inline CUDA_HOSTDEV device_array everything(const gScalar p, const gScalar T, const mdspan c, diff --git a/gpu_chemistry/src/gpuReaction/gpuSpecieExponent.H b/gpu_chemistry/src/gpuReaction/gpuSpecieExponent.H index 228142a..45ee4ef 100644 --- a/gpu_chemistry/src/gpuReaction/gpuSpecieExponent.H +++ b/gpu_chemistry/src/gpuReaction/gpuSpecieExponent.H @@ -36,7 +36,7 @@ inline CUDA_HOSTDEV gScalar integerPow(const gScalar x, } struct gpuSpecieExponent { - + gLabel noIntegerExponent_ = gpuLabelMax; gLabel integerExponent_; @@ -45,8 +45,7 @@ struct gpuSpecieExponent { inline CUDA_HOSTDEV gpuSpecieExponent() : integerExponent_(gpuLabelMax) - , scalarExponent_( - std::numeric_limits::signaling_NaN()) {} + , scalarExponent_(gpuScalarNaN) {} inline CUDA_HOSTDEV gpuSpecieExponent(const gLabel integerExponent) @@ -67,12 +66,12 @@ struct gpuSpecieExponent { } inline CUDA_HOSTDEV gpuSpecieExponent& - operator=(const gLabel integerExponent) { + operator=(const gLabel integerExponent) { return *this = gpuSpecieExponent(integerExponent); } inline CUDA_HOSTDEV gpuSpecieExponent& - operator=(const gScalar scalarExponent) { + operator=(const gScalar scalarExponent) { return *this = gpuSpecieExponent(scalarExponent); } diff --git a/gpu_chemistry/src/gpuReaction/gpuThirdBodyArrheniusReactionRate.H b/gpu_chemistry/src/gpuReaction/gpuThirdBodyArrheniusReactionRate.H index cdd4e79..6892814 100644 --- a/gpu_chemistry/src/gpuReaction/gpuThirdBodyArrheniusReactionRate.H +++ b/gpu_chemistry/src/gpuReaction/gpuThirdBodyArrheniusReactionRate.H @@ -98,26 +98,26 @@ public: ///@param c Concentration. ///@param ddc A span of at least size nSpcecie where the /// derivative is put. - ///@return std::pair A pair of scalars where the + ///@return device_array A pair of scalars where the /// first value is the reaction rate and second value is the /// derivative w.r.t temperature. /// - inline CUDA_HOSTDEV std::pair + inline CUDA_HOSTDEV device_array everything(const gScalar p, const gScalar T, const mdspan c, mdspan ddc) const { auto pair = k_.everything(p, T, c, ddc); - const gScalar k = std::get<0>(pair); - const gScalar ddT = std::get<1>(pair); + const gScalar k = pair[0]; + const gScalar ddT = pair[1]; for (gLabel i = 0; i < gLabel(ddc.size()); ++i) { ddc[i] = thirdBodyEfficiencies_.dMdc(c)[i] * k; } const gScalar M = thirdBodyEfficiencies_.M(c); - return std::make_pair(M * k, M * ddT); + return device_array({M * k, M * ddT}); } }; diff --git a/gpu_chemistry/src/gpuReaction/gpuThirdBodyEfficiencies.H b/gpu_chemistry/src/gpuReaction/gpuThirdBodyEfficiencies.H index 70d1064..21344e2 100644 --- a/gpu_chemistry/src/gpuReaction/gpuThirdBodyEfficiencies.H +++ b/gpu_chemistry/src/gpuReaction/gpuThirdBodyEfficiencies.H @@ -1,13 +1,13 @@ #pragma once +#include "device_array.H" #include "gpu_constants.H" -#include namespace FoamGpu { class gpuThirdBodyEfficiencies { public: - using effArray = std::array; + using effArray = device_array; gLabel nSpecie_; effArray efficiencies_{}; @@ -25,9 +25,12 @@ public: ///@param c Concentration. ///@return gScalar The concentration of the third-bodies. /// - inline CUDA_HOSTDEV gScalar M(const mdspan c) const { + inline CUDA_HOSTDEV gScalar + M(const mdspan c) const { gScalar M = 0; - for (gLabel i = 0; i < nSpecie_; ++i) { M += efficiencies_[i] * c[i]; } + for (gLabel i = 0; i < nSpecie_; ++i) { + M += efficiencies_[i] * c[i]; + } return M; } @@ -35,7 +38,8 @@ public: ///@brief Calculates the derivative of M w.r.t concentrations. /// ///@param c Concentration. - ///@return std::array The derivative of M w.r.t concentrations. + ///@return device_array The derivative of M w.r.t + ///concentrations. /// inline CUDA_HOSTDEV const effArray& dMdc(const mdspan c) const { diff --git a/gpu_chemistry/src/gpuReaction/gpuTroeFallOffFunction.H b/gpu_chemistry/src/gpuReaction/gpuTroeFallOffFunction.H index 719658e..1aba255 100644 --- a/gpu_chemistry/src/gpuReaction/gpuTroeFallOffFunction.H +++ b/gpu_chemistry/src/gpuReaction/gpuTroeFallOffFunction.H @@ -2,7 +2,8 @@ #include "gpu_constants.H" -// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +// * * * // namespace FoamGpu { @@ -12,7 +13,6 @@ class gpuTroeFallOffFunction { gScalar Tsss_, Ts_, Tss_; public: - gpuTroeFallOffFunction() = default; inline CUDA_HOSTDEV gpuTroeFallOffFunction(const gScalar alpha, @@ -24,7 +24,6 @@ public: , Ts_(Ts) , Tss_(Tss) {} - inline CUDA_HOSTDEV gScalar operator()(const gScalar T, const gScalar Pr) const { @@ -55,9 +54,10 @@ public: const gScalar Fcent = (1.0 - alpha_) * exp(-T / Tsss_) + alpha_ * exp(-T / Ts_) + exp(-Tss_ / T); - const gScalar dFcentdT = -(1.0 - alpha_) / Tsss_ * exp(-T / Tsss_) - - alpha_ / Ts_ * exp(-T / Ts_) + - Tss_ / (T * T) * exp(-Tss_ / T); + const gScalar dFcentdT = + -(1.0 - alpha_) / Tsss_ * exp(-T / Tsss_) - + alpha_ / Ts_ * exp(-T / Ts_) + + Tss_ / (T * T) * exp(-Tss_ / T); const gScalar logFcent = log10(fmax(Fcent, gpuSmall)); const gScalar dlogFcentdT = @@ -87,7 +87,8 @@ public: const gScalar logPr = log10(fmax(Pr, gpuSmall)); - const gScalar dlogPrdPr = Pr >= gpuSmall ? 1 / (gpuLogTen * Pr) : 0.0; + const gScalar dlogPrdPr = + Pr >= gpuSmall ? 1 / (gpuLogTen * Pr) : 0.0; const gScalar Fcent = (1.0 - alpha_) * exp(-T / Tsss_) + alpha_ * exp(-T / Ts_) + exp(-Tss_ / T); diff --git a/gpu_chemistry/src/gpuReaction/makeGpuReactions.H b/gpu_chemistry/src/gpuReaction/makeGpuReactions.H index f4f39cf..bc01cb8 100644 --- a/gpu_chemistry/src/gpuReaction/makeGpuReactions.H +++ b/gpu_chemistry/src/gpuReaction/makeGpuReactions.H @@ -3,12 +3,10 @@ #include #include -#include "host_device_vectors.H" #include "gpuReaction.H" #include "gpuThermo.H" #include "makeGpuReactionRate.H" - #include "ReactionList.H" #include "dictionary.H" @@ -23,31 +21,36 @@ set_reaction_thermo(gpuReaction& reaction, const std::vector& species) { // typename MulticomponentThermo::thermoType rhsThermo - gpuThermo rhsThermo(reaction.rhs()[0].stoichCoeff * - (thermoDatabase[species[reaction.rhs()[0].index]]).W() * - (thermoDatabase[species[reaction.rhs()[0].index]])); + gpuThermo rhsThermo( + reaction.rhs()[0].stoichCoeff * + (thermoDatabase[species[reaction.rhs()[0].index]]).W() * + (thermoDatabase[species[reaction.rhs()[0].index]])); for (gLabel i = 1; i < gLabel(reaction.rhs().size()); ++i) { - rhsThermo += reaction.rhs()[i].stoichCoeff * - (thermoDatabase[species[reaction.rhs()[i].index]]).W() * - (thermoDatabase[species[reaction.rhs()[i].index]]); + rhsThermo += + reaction.rhs()[i].stoichCoeff * + (thermoDatabase[species[reaction.rhs()[i].index]]).W() * + (thermoDatabase[species[reaction.rhs()[i].index]]); } - gpuThermo lhsThermo(reaction.lhs()[0].stoichCoeff * - (thermoDatabase[species[reaction.lhs()[0].index]]).W() * - (thermoDatabase[species[reaction.lhs()[0].index]])); + gpuThermo lhsThermo( + reaction.lhs()[0].stoichCoeff * + (thermoDatabase[species[reaction.lhs()[0].index]]).W() * + (thermoDatabase[species[reaction.lhs()[0].index]])); for (gLabel i = 1; i < gLabel(reaction.lhs().size()); ++i) { - lhsThermo += reaction.lhs()[i].stoichCoeff * - (thermoDatabase[species[reaction.lhs()[i].index]]).W() * - (thermoDatabase[species[reaction.lhs()[i].index]]); + lhsThermo += + reaction.lhs()[i].stoichCoeff * + (thermoDatabase[species[reaction.lhs()[i].index]]).W() * + (thermoDatabase[species[reaction.lhs()[i].index]]); } if (std::abs(lhsThermo.Y() - rhsThermo.Y()) > 0.1) { - FatalErrorInFunction << "Mass imbalance for reaction " - << ": " << std::abs(lhsThermo.Y() - rhsThermo.Y()) - << " kg/kmol" << exit(Foam::FatalError); + FatalErrorInFunction + << "Mass imbalance for reaction " << ": " + << std::abs(lhsThermo.Y() - rhsThermo.Y()) << " kg/kmol" + << exit(Foam::FatalError); } reaction.get_thermo() = (lhsThermo == rhsThermo); @@ -55,7 +58,8 @@ set_reaction_thermo(gpuReaction& reaction, // gpuThermo::operator=(lhsThermo == rhsThermo); } -static inline auto make_specie_coeffs(const Foam::List& l) { +static inline auto +make_specie_coeffs(const Foam::List& l) { using return_type = typename gpuReaction::specieCoeffArray; @@ -71,25 +75,24 @@ static inline auto make_specie_coeffs(const Foam::List& l) { gScalar exp = l[i].exponent; - if (isInteger(exp)){ - ret[i].exponent = gpuSpecieExponent(gLabel(exp)); - } - else { + if (isInteger(exp)) { + ret[i].exponent = gpuSpecieExponent(gLabel(exp)); + } else { ret[i].exponent = gpuSpecieExponent(gScalar(exp)); } - //ret[i].exponent = gpuSpecieExponent(gScalar(exp)); + // ret[i].exponent = gpuSpecieExponent(gScalar(exp)); - //ret[i].exponent = l[i].exponent; + // ret[i].exponent = l[i].exponent; } return ret; } template -static std::vector -makeGpuReactions(const Foam::speciesTable& species, - const Foam::dictionary& dict, - const std::vector& gpu_thermos, - const Foam::ReactionList& cpu_reactions) { +static std::vector makeGpuReactions( + const Foam::speciesTable& species, + const Foam::dictionary& dict, + const std::vector& gpu_thermos, + const Foam::ReactionList& cpu_reactions) { const Foam::dictionary& reactions(dict.subDict("reactions")); @@ -98,7 +101,8 @@ makeGpuReactions(const Foam::speciesTable& species, thermoDatabase[species[i]] = gpu_thermos[i]; } - std::vector specieNames(species.begin(), species.end()); + std::vector specieNames(species.begin(), + species.end()); std::vector ret; gLabel i = 0; @@ -137,11 +141,4 @@ makeGpuReactions(const Foam::speciesTable& species, return ret; } - - - - - - - } // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/gpuThermo/gpuMixture.H b/gpu_chemistry/src/gpuThermo/gpuMixture.H index cac5833..1bc660e 100644 --- a/gpu_chemistry/src/gpuThermo/gpuMixture.H +++ b/gpu_chemistry/src/gpuThermo/gpuMixture.H @@ -26,15 +26,15 @@ struct gpuMixture { /// @param Ha [OUT] A span containing the species absolute /// enthalpies. /// @param c [OUT] A span containing the species concetrations. - /// @return std::tuple a tuple - /// tuple of mixture density, heat capacity, derivative of heat + /// @return device_array an array + /// of mixture density, heat capacity, derivative of heat /// capacity w.r.t temperature and volumetric coefficient of /// thermal expansion, respectively. - CUDA_HOSTDEV std::tuple - computeJacobianRequired(const mdspan y, - mdspan Cp, - mdspan Ha, - mdspan c) { + CUDA_HOSTDEV device_array + computeJacobianRequired(const mdspan y, + mdspan Cp, + mdspan Ha, + mdspan c) { const gLabel nSpecie = y.size() - 2; const gScalar T = y[nSpecie]; @@ -71,7 +71,7 @@ struct gpuMixture { for (gLabel i = 0; i < nSpecie; ++i) { c[i] *= rhoM; } - return std::make_tuple(rhoM, CpM, dCpMdT, alphav); + return {rhoM, CpM, dCpMdT, alphav}; // alphav_ *= rhoM_; } @@ -83,12 +83,12 @@ struct gpuMixture { /// @param Ha [OUT] A span containing the species absolute /// enthalpies. /// @param c [OUT] A span containing the species concetrations. - /// @return std::pair a pair containing mixture + /// @return device_array a pair containing mixture /// density and specific heat, respectively. - CUDA_HOSTDEV std::pair - computeDerivativeRequired(const mdspan y, - mdspan Ha, - mdspan c) { + CUDA_HOSTDEV device_array + computeDerivativeRequired(const mdspan y, + mdspan Ha, + mdspan c) { const gLabel nSpecie = y.size() - 2; const gScalar T = y[nSpecie]; @@ -114,8 +114,8 @@ struct gpuMixture { rhoM = 1.0 / rhoM; for (gLabel i = 0; i < nSpecie; ++i) { c[i] *= rhoM; } - - return std::make_pair(rhoM, CpM); + return {rhoM, CpM}; + // return device_array({rhoM, CpM}); // alphav_ *= rhoM_; } diff --git a/gpu_chemistry/src/gpuThermo/gpuThermo.H b/gpu_chemistry/src/gpuThermo/gpuThermo.H index b835871..73a749d 100644 --- a/gpu_chemistry/src/gpuThermo/gpuThermo.H +++ b/gpu_chemistry/src/gpuThermo/gpuThermo.H @@ -1,8 +1,8 @@ #pragma once #include "cuda_host_dev.H" +#include "device_array.H" #include "gpuPerfectGas.H" #include "gpu_constants.H" -#include //std::array namespace FoamGpu { @@ -12,7 +12,7 @@ public: static constexpr size_t nCoeffs_ = 7; - using coeffArray = std::array; + using coeffArray = device_array; // Temperature limits of applicability of functions gScalar Tlow_, Thigh_, Tcommon_; @@ -136,12 +136,12 @@ public: /// @param p Pressure. /// @param T Temperature. /// @return A pair of scalars [Kc, dKcTbyKc]. - inline CUDA_HOSTDEV std::pair + inline CUDA_HOSTDEV device_array KcAnddKcTbyKc(const gScalar p, const gScalar T) const { // const gScalar nm = this->Y() / this->W(); const gScalar nm = this->Y() / this->W(); - const gScalar logT = log(T); + const gScalar logT = log(T); const gScalar Gstd = this->Gstd(T, logT); const bool nm_is_small = fabs(nm - gpuSmall) < gpuVSmall; const gScalar gpuRR_times_T = gpuRR * T; @@ -165,7 +165,7 @@ public: return ret - nm / T; }(); - return std::make_pair(Kc, dKcdTbyKc); + return device_array({Kc, dKcdTbyKc}); } /// @brief Computes the heat capacity [J/kg/K] at constant @@ -279,13 +279,13 @@ public: return S(p, T, log(T)); } - /// @brief Computes the Gibbs free energy [J/kg] of the mixture in /// the standard state. /// @param T Temperature. /// @param logT Precomputed log(T) /// @return The Gibbs free energy. - inline CUDA_HOSTDEV gScalar Gstd(const gScalar T, const gScalar logT) const { + inline CUDA_HOSTDEV gScalar Gstd(const gScalar T, + const gScalar logT) const { const auto& a = coeffs(T); return ( diff --git a/gpu_chemistry/src/gpuThermo/makeGpuThermo.H b/gpu_chemistry/src/gpuThermo/makeGpuThermo.H index 09c1894..ca07d3b 100644 --- a/gpu_chemistry/src/gpuThermo/makeGpuThermo.H +++ b/gpu_chemistry/src/gpuThermo/makeGpuThermo.H @@ -14,18 +14,19 @@ namespace FoamGpu { template -static inline gpuThermo makeGpuThermo(const ThermoType& cputhermo, - const Foam::dictionary& thermoDict) { +static inline gpuThermo +makeGpuThermo(const ThermoType& cputhermo, + const Foam::dictionary& thermoDict) { gScalar W = cputhermo.W(); gScalar Y = cputhermo.Y(); - gScalar Tlow( - thermoDict.subDict("thermodynamics").lookup("Tlow")); - gScalar Thigh( - thermoDict.subDict("thermodynamics").lookup("Thigh")); - gScalar Tcommon( - thermoDict.subDict("thermodynamics").lookup("Tcommon")); + gScalar Tlow(thermoDict.subDict("thermodynamics") + .lookup("Tlow")); + gScalar Thigh(thermoDict.subDict("thermodynamics") + .lookup("Thigh")); + gScalar Tcommon(thermoDict.subDict("thermodynamics") + .lookup("Tcommon")); Foam::FixedList cHigh = thermoDict.subDict("thermodynamics").lookup("highCpCoeffs"); @@ -42,7 +43,8 @@ static inline gpuThermo makeGpuThermo(const ThermoType& cputhermo, lowCpCoeffs[i] = cputhermo.R() * cLow[i]; } - return gpuThermo(Y, W, Tlow, Thigh, Tcommon, highCpCoeffs, lowCpCoeffs); + return gpuThermo( + Y, W, Tlow, Thigh, Tcommon, highCpCoeffs, lowCpCoeffs); } template @@ -53,9 +55,10 @@ makeGpuThermos(const Foam::PtrList& cpuThermos, for (const auto& t : cpuThermos) { const auto& specieName = t.name(); - const auto subDict = physicalProperties.subDict(specieName); - gpuThermo gpu = makeGpuThermo(t, subDict); - // gpuThermo gpu(t.Y(), t.W(), physicalProperties.subDict(specieName)); + const auto subDict = physicalProperties.subDict(specieName); + gpuThermo gpu = makeGpuThermo(t, subDict); + // gpuThermo gpu(t.Y(), t.W(), + // physicalProperties.subDict(specieName)); gpuThermos.push_back(gpu); } diff --git a/gpu_chemistry/src/memoryResource/arrays.H b/gpu_chemistry/src/memoryResource/arrays.H index 1f99c59..6de039a 100644 --- a/gpu_chemistry/src/memoryResource/arrays.H +++ b/gpu_chemistry/src/memoryResource/arrays.H @@ -23,9 +23,6 @@ enum scalarArrays { N_SCALAR_ARRAYS }; -enum twoDScalarArrays { - J, - N_TWOD_SCALAR_ARRAYS -}; +enum twoDScalarArrays { J, N_TWOD_SCALAR_ARRAYS }; } // namespace FoamGpu diff --git a/gpu_chemistry/src/memoryResource/cpuMemoryResource.H b/gpu_chemistry/src/memoryResource/cpuMemoryResource.H index 849d8a6..78d0d4b 100644 --- a/gpu_chemistry/src/memoryResource/cpuMemoryResource.H +++ b/gpu_chemistry/src/memoryResource/cpuMemoryResource.H @@ -4,7 +4,6 @@ #include "gpu_constants.H" #include "memoryResource.H" #include "pointer_casts.hpp" -#include #include namespace FoamGpu { @@ -23,15 +22,17 @@ struct cpuMemoryResource : public memoryResource { ~cpuMemoryResource() { deallocate(); } - std::array& getLabelData() override { + device_array& getLabelData() override { return labelData_; } - std::array& getScalarData() override { + device_array& + getScalarData() override { return scalarData_; } - std::array& getTwodScalarData() override { + device_array& + getTwodScalarData() override { return twodScalarData_; } @@ -58,7 +59,8 @@ private: scalarData_[i] = sAllocator.allocate(scalarArrayLength()); } for (gLabel i = 0; i < N_TWOD_SCALAR_ARRAYS; ++i) { - twodScalarData_[i] = sAllocator.allocate(twodScalarArrayLength()); + twodScalarData_[i] = + sAllocator.allocate(twodScalarArrayLength()); } } @@ -66,7 +68,6 @@ private: labelAllocator lAllocator; scalarAllocator sAllocator; - for (gLabel i = 0; i < N_LABEL_ARRAYS; ++i) { auto ptr = labelData_[i]; lAllocator.deallocate(ptr, labelArrayLength()); @@ -81,9 +82,9 @@ private: } } - std::array labelData_{}; - std::array scalarData_{}; - std::array twodScalarData_{}; + device_array labelData_{}; + device_array scalarData_{}; + device_array twodScalarData_{}; }; } // namespace FoamGpu \ No newline at end of file diff --git a/gpu_chemistry/src/memoryResource/gpuBuffer.H b/gpu_chemistry/src/memoryResource/gpuBuffer.H index 41fa022..93c1f44 100644 --- a/gpu_chemistry/src/memoryResource/gpuBuffer.H +++ b/gpu_chemistry/src/memoryResource/gpuBuffer.H @@ -2,9 +2,10 @@ #include "arrays.H" #include "check_ptr.H" +#include "device_array.H" #include "gpu_constants.H" #include "mdspan.H" -//#include "timer.H" +// #include "timer.H" namespace FoamGpu { @@ -14,10 +15,10 @@ struct gpuBuffer { gpuBuffer() = default; CUDA_HOSTDEV - gpuBuffer(std::array lData, - std::array sData, - std::array twodSData, - gLabel nSpecie) + gpuBuffer(device_array lData, + device_array sData, + device_array twodSData, + gLabel nSpecie) : lData_(lData) , sData_(sData) , twodSData_(twodSData) @@ -114,11 +115,11 @@ struct gpuBuffer { } private: - std::array lData_; - std::array sData_; - std::array twodSData_; - gLabel nSpecie_; - gLabel nEqns_; + device_array lData_; + device_array sData_; + device_array twodSData_; + gLabel nSpecie_; + gLabel nEqns_; }; } // namespace FoamGpu diff --git a/gpu_chemistry/src/memoryResource/memoryResource.H b/gpu_chemistry/src/memoryResource/memoryResource.H index b1e700f..6e03e41 100644 --- a/gpu_chemistry/src/memoryResource/memoryResource.H +++ b/gpu_chemistry/src/memoryResource/memoryResource.H @@ -1,10 +1,10 @@ #pragma once +#include "device_array.H" #include "gpuBuffer.H" #include "gpu_constants.H" #include "mdspan.H" #include "pointer_casts.hpp" -#include #include namespace FoamGpu { @@ -24,11 +24,13 @@ struct memoryResource { gLabel nEqns() const { return nEqns_; } gLabel nSpecie() const { return nSpecie_; } - virtual std::array& getLabelData() = 0; + virtual device_array& getLabelData() = 0; - virtual std::array& getScalarData() = 0; + virtual device_array& + getScalarData() = 0; - virtual std::array& getTwodScalarData() = 0; + virtual device_array& + getTwodScalarData() = 0; virtual void resize(gLabel nCells, gLabel nSpecie) = 0; @@ -43,20 +45,21 @@ protected: bool shouldReallocate(gLabel nCells, gLabel nSpecie) const { - if ((nCells != this->nCells()) || (nSpecie != this->nSpecie()) || + if ((nCells != this->nCells()) || + (nSpecie != this->nSpecie()) || (nCells + 2 != this->nEqns())) { return true; } return false; } - gLabel nCells_; gLabel nSpecie_; gLabel nEqns_; }; -static inline std::vector splitToBuffers(memoryResource& mr) { +static inline std::vector +splitToBuffers(memoryResource& mr) { std::vector ret(mr.nCells()); @@ -65,22 +68,24 @@ static inline std::vector splitToBuffers(memoryResource& mr) { auto& twod_sd = mr.getTwodScalarData(); for (gLabel j = 0; j < mr.nCells(); ++j) { - std::array labelData{}; + device_array labelData{}; for (gLabel i = 0; i < N_LABEL_ARRAYS; ++i) { labelData[i] = ld[i] + mr.nEqns() * j; } - std::array scalarData{}; + device_array scalarData{}; for (gLabel i = 0; i < N_SCALAR_ARRAYS; ++i) { scalarData[i] = sd[i] + mr.nEqns() * j; } - std::array twodScalarData{}; + device_array twodScalarData{}; for (gLabel i = 0; i < N_TWOD_SCALAR_ARRAYS; ++i) { - twodScalarData[i] = twod_sd[i] + mr.nEqns() * mr.nEqns() * j; + twodScalarData[i] = + twod_sd[i] + mr.nEqns() * mr.nEqns() * j; } - ret[j] = gpuBuffer(labelData, scalarData, twodScalarData, mr.nSpecie()); + ret[j] = gpuBuffer( + labelData, scalarData, twodScalarData, mr.nSpecie()); } return ret; diff --git a/gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/files b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/files new file mode 100644 index 0000000..f3d9541 --- /dev/null +++ b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/files @@ -0,0 +1,5 @@ +openfoam_reference_kernels.C + + +LIB = $(FOAM_USER_LIBBIN)/libGpuFoamOpenFOAMReferenceKernels + diff --git a/gpu_chemistry/benchmark/evaluator/Make/options b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/options similarity index 96% rename from gpu_chemistry/benchmark/evaluator/Make/options rename to gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/options index d389a52..610b100 100644 --- a/gpu_chemistry/benchmark/evaluator/Make/options +++ b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/Make/options @@ -1,11 +1,10 @@ EXE_INC = \ + -I../testHelpers/lnInclude \ -I../../catchMain/lnInclude \ -I../../src/lnInclude \ -I../../src/gpuChemistryModels/lnInclude \ -I../../src/gpuKernelEvaluator/lnInclude \ -I../../../gpu_utils/common/lnInclude \ - -I../../unittest/testHelpers/ \ - -I../ \ -I$(LIB_SRC)/physicalProperties/lnInclude \ -I$(LIB_SRC)/thermophysicalModels/multicomponentThermo/lnInclude \ -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \ @@ -18,10 +17,11 @@ EXE_INC = \ -I$(LIB_SRC)/thermophysicalModels/chemistryModel/lnInclude \ -std=c++17 + EXE_LIBS = \ -L$(FOAM_USER_LIBBIN) \ - -lGpuKernelEvaluator \ -lGpuChemistryTestHelpers \ + -lGpuKernelEvaluator \ -lCatchMain \ -lmeshTools \ -lthermophysicalProperties \ @@ -34,4 +34,3 @@ EXE_LIBS = \ -lfiniteVolume \ -lchemistryModel \ -lblockMesh - diff --git a/gpu_chemistry/unittest/testHelpers/mock_of_odesystem.H b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/mock_of_odesystem.H similarity index 98% rename from gpu_chemistry/unittest/testHelpers/mock_of_odesystem.H rename to gpu_chemistry/unittest/OpenFOAMReferenceKernels/mock_of_odesystem.H index c470890..701a76b 100644 --- a/gpu_chemistry/unittest/testHelpers/mock_of_odesystem.H +++ b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/mock_of_odesystem.H @@ -26,6 +26,13 @@ class MockOFSystem : public ODESystem { const jacobianType jacobianType_ = jacobianType::fast; public: + + const ReactionList& getReactions() const + { + return reactions_; + } + + MockOFSystem(TestData::Mechanism m) : specieThermos_(TestData::makeCpuThermos(m)) , reactions_(TestData::makeSpeciesTable(m), diff --git a/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.C b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.C new file mode 100644 index 0000000..1f9ef0c --- /dev/null +++ b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.C @@ -0,0 +1,320 @@ +#include "openfoam_reference_kernels.H" +#include "volFields.H" +#include "thermodynamicConstants.H" +#include "fundamentalConstants.H" +#include "physicoChemicalConstants.H" +#include "scalarMatrices.H" +#include "ODESolver.H" + +#include "mock_of_odesystem.H" + + +namespace OFReferenceKernels{ + +TestData::constantResults constants() +{ + TestData::constantResults ret; + + ret.RR = Foam::constant::thermodynamic::RR; + ret.Pstd = Foam::constant::thermodynamic::Pstd; + ret.Tstd = Foam::constant::thermodynamic::Tstd; + ret.NA = Foam::constant::physicoChemical::NA.value(); + ret.k = Foam::constant::physicoChemical::k.value(); + ret.vGreat = Foam::vGreat; + ret.vSmall = Foam::vSmall; + ret.small = Foam::small; + ret.great = Foam::great; + return ret; +} + + +TestData::perfectGasResult perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight) +{ + + const Foam::perfectGas eos + ( + Foam::specie("temp", Y, molWeight) + ); + + TestData::perfectGasResult ret; + + ret.R = eos.R(); + ret.rho = eos.rho(p, T); + ret.h = eos.h(p, T); + ret.Cp = eos.Cp(p, T); + ret.e = eos.e(p, T); + ret.Cv = eos.Cv(p, T); + ret.sp = eos.sp(p, T); + //ret.sv = eos.sv(p, T); + ret.psi = eos.psi(p, T); + ret.Z = eos.Z(p, T); + ret.CpMCv = eos.CpMCv(p, T); + ret.alphav = eos.alphav(p, T); + return ret; + + +} + + + +TestData::thermoResults thermo(TestData::Mechanism mech) +{ + const Foam::scalar p = TestData::pInf(mech); + const Foam::scalar T = TestData::TInf(mech); + + auto thermos = TestData::makeCpuThermos(mech); + + const gLabel nThermo = thermos.size(); + + TestData::thermoResults ret(nThermo); + + for (gLabel i = 0; i < thermos.size(); ++i) + { + ret.W[i] = thermos[i].W(); + ret.Y[i] = thermos[i].Y(); + ret.R[i] = thermos[i].R(); + ret.Cp[i] = thermos[i].Cp(p, T); + ret.ha[i] = thermos[i].ha(p, T); + ret.hs[i] = thermos[i].hs(p, T); + ret.hf[i] = thermos[i].hf( ); + ret.s[i] = thermos[i].s(p, T); + ret.gStd[i] = thermos[i].gStd(T); + ret.dCpdT[i] = thermos[i].dCpdT(p, T); + ret.Cv[i] = thermos[i].Cv(p, T); + ret.es[i] = thermos[i].es(p, T); + ret.ea[i] = thermos[i].ea(p, T); + ret.K[i] = thermos[i].K(p, T); + ret.Kp[i] = thermos[i].Kp(p, T); + ret.Kc[i] = thermos[i].Kc(p, T); + ret.dKcdTbyKc[i] = thermos[i].dKcdTbyKc(p, T); + } + + return ret; + +} + + + +TestData::reactionResults reaction(TestData::Mechanism mech) +{ + const Foam::ReactionList reactions( + TestData::makeSpeciesTable(mech), + TestData::makeCpuThermos(mech), + TestData::makeReactionDict(mech) + ); + + + const gLabel nSpecie = TestData::speciesCount(mech); + const gLabel nEqns = TestData::equationCount(mech); + const gLabel nReactions = reactions.size(); + + const Foam::scalarField c = [&](){ + Foam::scalarField ret(nSpecie); + //assign_test_concentration(ret, mech); + fill_linear(ret); + return ret; + + }(); + Foam::scalar p = TestData::pInf(mech); + Foam::scalar T = TestData::TInf(mech); + Foam::label li = 0; + + + Foam::List c2s; + gLabel csi0 = 0; + gLabel Tsi = nSpecie; + Foam::scalarField cTpWork0(nSpecie, 0); + Foam::scalarField cTpWork1(nSpecie, 0); + + + + std::vector Thigh(nReactions); + std::vector Tlow(nReactions); + std::vector Kc(nReactions); + std::vector kf(nReactions); + std::vector kr(nReactions); + std::vector omega(nReactions); + + std::vector> dNdtByV(nReactions); + std::vector> ddNdtByVdcTp(nReactions); + + for (Foam::label i = 0; i < reactions.size(); ++i) + { + Thigh[i] = reactions[i].Thigh(); + Tlow[i] = reactions[i].Tlow(); + Kc[i] = reactions[i].Kc(p, T); + kf[i] = reactions[i].kf(p, T, c, li); + kr[i] = reactions[i].kr(p, T, c, li); + + //arbitrary + Foam::scalar omegaf = 0.3; + Foam::scalar omegar = 0.4; + omega[i] = reactions[i].omega(p, T, c, li, omegaf, omegar); + + + Foam::scalarField dNdtByV_f(c.size(), 0); + reactions[i].dNdtByV(p, T, c, li, dNdtByV_f, false, Foam::List{}, 0); + dNdtByV[i] = std::vector(dNdtByV_f.begin(), dNdtByV_f.end()); + + + Foam::scalarSquareMatrix ddNdtByVdcTp_f(nEqns, 0); + + dNdtByV_f = 0; //probably not necessary + reactions[i].ddNdtByVdcTp + ( + p, + T, + c, + li, + dNdtByV_f, + ddNdtByVdcTp_f, + false, + c2s, + csi0, + Tsi, + cTpWork0, + cTpWork1 + ); + + + ddNdtByVdcTp[i] = std::vector + ( + ddNdtByVdcTp_f.v(), + ddNdtByVdcTp_f.v() + ddNdtByVdcTp_f.size() + ); + + + } + TestData::reactionResults ret; + ret.Thigh = Thigh; + ret.Tlow = Tlow; + ret.Kc = Kc; + ret.kf = kf; + ret.kr = kr; + ret.omega = omega; + ret.dNdtByV = dNdtByV; + ret.ddNdtByVdcTp = ddNdtByVdcTp; + return ret; + + +} + + +std::tuple, std::vector, std::vector> +lu(const std::vector& m_vals, const std::vector& s_vals) +{ + gLabel size = std::sqrt(m_vals.size()); + + Foam::scalarSquareMatrix matrix(size, 0); + std::copy(m_vals.begin(), m_vals.end(), matrix.v()); + Foam::List pivot(size, 0); + Foam::scalarField source(size); + std::copy(s_vals.begin(), s_vals.end(), source.begin()); + + gLabel sign; + Foam::LUDecompose(matrix, pivot, sign); + Foam::LUBacksubstitute(matrix, pivot, source); + + + auto m_ret = std::vector(matrix.v(), matrix.v() + size*size); + auto p_ret = std::vector(pivot.begin(), pivot.end()); + auto s_ret = std::vector(source.begin(), source.end()); + + return std::make_tuple(m_ret, p_ret, s_ret); + +} + + +TestData::odeSystemResults odesystem(TestData::Mechanism mech) +{ + const gLabel nSpecie = TestData::speciesCount(mech); + const gLabel nEqns = TestData::equationCount(mech); + Foam::MockOFSystem system(mech); + + TestData::odeSystemResults ret; + + const Foam::scalarField y0 = [=](){ + gLabel nEqns = TestData::equationCount(mech); + Foam::scalarField y0_t(nEqns); + fill_linear(y0_t); + y0_t[nSpecie] = TestData::TInf(mech); + y0_t[nSpecie + 1] = TestData::pInf(mech); + + //std::vector y0_vec = TestData::get_solution_vector(mech); + //std::copy(y0_vec.begin(), y0_vec.end(), y0_t.begin()); + return y0_t; + }(); + + const Foam::scalar time = 0.32423; + + const gLabel li = 0; + + { + Foam::scalarField dy(nEqns, 0.31); + system.derivatives(0.0, y0, li, dy); + ret.derivative = std::vector(dy.begin(), dy.end()); + + } + + { + Foam::scalarField dy(nEqns, 0.31); + Foam::scalarSquareMatrix J(nEqns, 0.1); + system.jacobian(time, y0, li, dy, J); + ret.jacobian = std::vector(J.v(), J.v()+J.size()); + } + + + + return ret; + +} + + +std::vector ode_solve(TestData::Mechanism mech, std::string solver_name, gScalar xStart, gScalar xEnd, gScalar dxTry) +{ + Foam::dictionary dict; + dict.add("solver", solver_name); + Foam::MockOFSystem system(mech); + + + auto ode = Foam::ODESolver::New(system, dict); + + + Foam::scalarField y = [&](){ + + std::vector v = TestData::get_solution_vector(mech); + Foam::scalarField ret(v.size()); + std::copy(v.begin(), v.end(), ret.begin()); + return ret; + + }(); + + /* + Foam::scalarField y = [=](){ + const gLabel nSpecie = TestData::speciesCount(mech); + const gLabel nEqns = TestData::equationCount(mech); + Foam::scalarField y0_t(nEqns); + fill_linear(y0_t); + y0_t[nSpecie] = TestData::TInf(mech); + y0_t[nSpecie + 1] = TestData::pInf(mech); + return y0_t; + }(); + */ + const Foam::label li = 0; + + + Foam::scalar dxTry_temp = dxTry; + ode->solve(xStart, xEnd, y, li, dxTry_temp); + + + return std::vector(y.begin(), y.end()); + + +} + + + +} + + + diff --git a/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.H b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.H new file mode 100644 index 0000000..23fcf3d --- /dev/null +++ b/gpu_chemistry/unittest/OpenFOAMReferenceKernels/openfoam_reference_kernels.H @@ -0,0 +1,34 @@ +#pragma once + +#include "mechanisms.H" +#include "results.H" + +#include +#include +#include + +namespace OFReferenceKernels { + +TestData::constantResults constants(); + +TestData::perfectGasResult +perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight); + +TestData::thermoResults thermo(TestData::Mechanism mech); +TestData::reactionResults reaction(TestData::Mechanism mech); + +std::tuple, + std::vector, + std::vector> +lu(const std::vector& m_vals, + const std::vector& s_vals); + +TestData::odeSystemResults odesystem(TestData::Mechanism mech); + +std::vector ode_solve(TestData::Mechanism mech, + std::string solver_name, + gScalar xStart, + gScalar xEnd, + gScalar dxTry); + +} // namespace OFReferenceKernels diff --git a/gpu_chemistry/unittest/cpuTestKernels/Make/files b/gpu_chemistry/unittest/cpuTestKernels/Make/files new file mode 100644 index 0000000..110e02a --- /dev/null +++ b/gpu_chemistry/unittest/cpuTestKernels/Make/files @@ -0,0 +1,5 @@ +cpu_test_kernels.C + + +LIB = $(FOAM_USER_LIBBIN)/libGpuFoamCpuTestKernels + diff --git a/gpu_chemistry/unittest/cpuTestKernels/Make/options b/gpu_chemistry/unittest/cpuTestKernels/Make/options new file mode 100644 index 0000000..610b100 --- /dev/null +++ b/gpu_chemistry/unittest/cpuTestKernels/Make/options @@ -0,0 +1,36 @@ +EXE_INC = \ + -I../testHelpers/lnInclude \ + -I../../catchMain/lnInclude \ + -I../../src/lnInclude \ + -I../../src/gpuChemistryModels/lnInclude \ + -I../../src/gpuKernelEvaluator/lnInclude \ + -I../../../gpu_utils/common/lnInclude \ + -I$(LIB_SRC)/physicalProperties/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/multicomponentThermo/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/specie/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/functions/Polynomial \ + -I$(LIB_SRC)/thermophysicalModels/chemistryModel/lnInclude \ + -I$(LIB_SRC)/ODE/lnInclude \ + -I$(LIB_SRC)/finiteVolume/lnInclude \ + -I$(LIB_SRC)/meshTools/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/chemistryModel/lnInclude \ + -std=c++17 + + +EXE_LIBS = \ + -L$(FOAM_USER_LIBBIN) \ + -lGpuChemistryTestHelpers \ + -lGpuKernelEvaluator \ + -lCatchMain \ + -lmeshTools \ + -lthermophysicalProperties \ + -lODE \ + -lOpenFOAM \ + -lspecie \ + -lfluidThermoThermophysicalTransportModels \ + -lfluidThermophysicalModels \ + -lmulticomponentThermophysicalModels \ + -lfiniteVolume \ + -lchemistryModel \ + -lblockMesh diff --git a/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.C b/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.C new file mode 100644 index 0000000..1506d51 --- /dev/null +++ b/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.C @@ -0,0 +1,422 @@ +#include "cpu_test_kernels.H" +#include "test_utilities.H" +#include "gpuReaction.H" +#include "gpuODESystem.H" +#include "gpuKernelEvaluator.H" +#include "cpuMemoryResource.H" +#include "host_device_vectors.H" +#include "ludecompose.H" +#include "create_gpu_inputs.H" +#include "makeGpuOdeSolver.H" + + + +namespace CpuTestKernels{ + + +using memoryResource_t = FoamGpu::cpuMemoryResource; + + +template +static inline gScalar eval(T t) +{ + return t(); +} + + +TestData::constantResults constants(){ + + using namespace FoamGpu; + + TestData::constantResults ret; + ret.RR = eval([] DEVICE (){return gpuRR;}); + ret.Pstd = eval([] DEVICE (){return gpuPstd;}); + ret.Tstd = eval([] DEVICE (){return gpuTstd;}); + ret.NA = eval([] DEVICE (){return gpuNA;}); + ret.k = eval([] DEVICE (){return gpuk;}); + ret.vGreat = eval([] DEVICE (){return gpuVGreat;}); + ret.vSmall = eval([] DEVICE (){return gpuVSmall;}); + ret.small = eval([] DEVICE (){return gpuSmall;}); + ret.great = eval([] DEVICE (){return gpuGreat;}); + return ret; + +} + + +TestData::perfectGasResult perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight) +{ + using namespace FoamGpu; + + const gpuPerfectGas eos + ( + Y, molWeight + ); + + TestData::perfectGasResult ret; + + + ret.R = eval([=] DEVICE (){return eos.R();}); + ret.rho = eval([=] DEVICE (){return eos.rho(p, T);}); + ret.h = eval([=] DEVICE (){return eos.H(p, T);}); + ret.Cp = eval([=] DEVICE (){return eos.Cp(p, T);}); + ret.e = eval([=] DEVICE (){return eos.E(p, T);}); + ret.Cv = eval([=] DEVICE (){return eos.Cv(p, T);}); + ret.sp = eval([=] DEVICE (){return eos.Sp(p, T);}); + //ret.sv = eval([=] DEVICE (){return eos.Sv(p, T);}); + ret.psi = eval([=] DEVICE (){return eos.psi(p, T);}); + ret.Z = eval([=] DEVICE (){return eos.Z(p, T);}); + ret.CpMCv = eval([=] DEVICE (){return eos.CpMCv(p, T);}); + ret.alphav = eval([=] DEVICE (){return eos.alphav(p, T);}); + + + return ret; +} + + +TestData::thermoResults thermo(TestData::Mechanism mech) +{ + using namespace FoamGpu; + + const auto p = TestData::pInf(mech); + const auto T = TestData::TInf(mech); + + auto thermos_temp = TestData::makeGpuThermos(mech); + auto thermos = toDeviceVector(thermos_temp); + + + const gLabel nThermo = thermos.size(); + + TestData::thermoResults ret(nThermo); + + for (size_t i = 0; i < thermos.size(); ++i) + { + gpuThermo* thermo = make_raw_pointer(thermos.data()) + i; + + ret.W[i] = eval([=] DEVICE (){return thermo->W();}); + ret.Y[i] = eval([=] DEVICE (){return thermo->Y();}); + ret.R[i] = eval([=] DEVICE (){return thermo->R();}); + ret.Cp[i] = eval([=] DEVICE (){return thermo->Cp(p, T);}); + ret.ha[i] = eval([=] DEVICE (){return thermo->Ha(p, T);}); + ret.hs[i] = eval([=] DEVICE (){return thermo->Hs(p, T);}); + ret.hf[i] = eval([=] DEVICE (){return thermo->Hf( );}); + ret.s[i] = eval([=] DEVICE (){return thermo->S(p, T);}); + ret.gStd[i] = eval([=] DEVICE (){return thermo->Gstd(T);}); + ret.dCpdT[i] = eval([=] DEVICE (){return thermo->dCpdT(p, T);}); + ret.Cv[i] = eval([=] DEVICE (){return thermo->Cv(p, T);}); + ret.es[i] = eval([=] DEVICE (){return thermo->Es(p, T);}); + ret.ea[i] = eval([=] DEVICE (){return thermo->Ea(p, T);}); + ret.K[i] = eval([=] DEVICE (){return thermo->K(p, T);}); + ret.Kp[i] = eval([=] DEVICE (){return thermo->Kp(p, T);}); + ret.Kc[i] = eval([=] DEVICE (){return thermo->Kc(p, T);}); + ret.dKcdTbyKc[i] = eval([=] DEVICE (){return thermo->dKcdTbyKc(p, T);}); + + } + + + return ret; + +} + + + +TestData::reactionResults reaction(TestData::Mechanism mech) +{ + + using namespace FoamGpu; + + auto reactions_temp = makeGpuReactions(mech); + auto reactions = toDeviceVector(reactions_temp); + + const gLabel nSpecie = TestData::speciesCount(mech); + const gLabel nEqns = TestData::equationCount(mech); + const size_t nReactions = reactions.size(); + const gScalar p = TestData::pInf(mech); + const gScalar T = TestData::TInf(mech); + + device_vector cc = + [&](){ + std::vector t(nSpecie); + fill_linear(t); + return toDeviceVector(t); + }(); + + std::vector Thigh(nReactions); + std::vector Tlow(nReactions); + std::vector Kc(nReactions); + std::vector kf(nReactions); + std::vector kr(nReactions); + std::vector omega(nReactions); + std::vector> dNdtByV(nReactions); + std::vector> ddNdtByVdcTp(nReactions); + + for (size_t i = 0; i < nReactions; ++i){ + + gpuReaction* reaction = make_raw_pointer(reactions.data()) + i; + + Thigh[i] = eval([=] DEVICE (){return reaction->Thigh();}); + Tlow[i] = eval([=] DEVICE (){return reaction->Tlow();}); + + auto c = make_mdspan(cc, extents<1>{nSpecie}); + + Kc[i] = eval([=] DEVICE (){return reaction->Kc(p, T);}); + kf[i] = eval([=] DEVICE (){return reaction->kf(p, T, c);}); + kr[i] = eval([=] DEVICE (){return reaction->kr(p, T, c);}); + omega[i] = eval([=] DEVICE (){return reaction->omega(p, T, c);}); + + + + ///==================dNdtByV================== + device_vector dNdtByV_i = toDeviceVector(std::vector(nSpecie, 0)); + auto f = + [ + =, + res = make_mdspan(dNdtByV_i, extents<1>{nSpecie}) + ] DEVICE + (){ + reaction->dNdtByV(p, T, c, res); + return 0; + }; + eval(f); + dNdtByV[i] = toStdVector(dNdtByV_i); + + + + ///==================ddNdtByVdcTp================== + device_vector ddNdtByVdcTp_i = toDeviceVector(std::vector(nEqns*nEqns, 0)); + device_vector cTpWork0_i(nSpecie); + auto f2 = + [ + =, + res = make_mdspan(ddNdtByVdcTp_i, extents<2>{nEqns, nEqns}), + cTpWork0 = make_mdspan(cTpWork0_i, extents<1>{nSpecie}) + ] DEVICE + () + { + auto params = computeReactionParameters(*reaction, c, p, T, cTpWork0); + + reaction->ddNdtByVdcTp + ( + p, + T, + c, + res, + params + + ); + + return 0; + }; + + eval(f2); + ddNdtByVdcTp[i] = toStdVector(ddNdtByVdcTp_i); + + + } + + TestData::reactionResults ret; + ret.Thigh = Thigh; + ret.Tlow = Tlow; + ret.Kc = Kc; + ret.kf = kf; + ret.kr = kr; + ret.omega = omega; + ret.dNdtByV = dNdtByV; + ret.ddNdtByVdcTp = ddNdtByVdcTp; + return ret; + +} + + +std::tuple, std::vector, std::vector> +lu(const std::vector& m_vals, const std::vector& s_vals) +{ + + gLabel size = std::sqrt(m_vals.size()); + + device_vector matrix(m_vals.begin(), m_vals.end()); + device_vector pivot = toDeviceVector(std::vector(size, 0)); + //device_vector pivot(size, 0); + //device_vector v(size, 0); + device_vector v = toDeviceVector(std::vector(size, 0)); + device_vector source(s_vals.begin(), s_vals.end()); + + auto m_span = make_mdspan(matrix, extents<2>{size, size}); + auto p_span = make_mdspan(pivot, extents<1>{size}); + auto v_span = make_mdspan(v, extents<1>{size}); + + eval + ( + [=] DEVICE (){FoamGpu::LUDecompose(m_span, p_span, v_span); return 0;} + ); + + auto s_span = make_mdspan(source, extents<1>{size}); + + eval + ( + [=] DEVICE (){FoamGpu::LUBacksubstitute(m_span, p_span, s_span); return 0;} + ); + + + auto m_ret = toStdVector(matrix); + auto p_ret = toStdVector(pivot); + auto s_ret = toStdVector(source); + + return std::make_tuple(m_ret, p_ret, s_ret); + +} + + +TestData::odeSystemResults odesystem(TestData::Mechanism mech) +{ + using namespace FoamGpu; + + auto gpu_thermos = toDeviceVector(makeGpuThermos(mech)); + auto gpu_reactions = toDeviceVector(makeGpuReactions(mech)); + const auto nEqns = TestData::equationCount(mech); + const auto nSpecie = TestData::speciesCount(mech); + + gpuODESystem gpu + ( + nEqns, + gLabel(gpu_reactions.size()), + make_raw_pointer(gpu_thermos.data()), + make_raw_pointer(gpu_reactions.data()) + ); + + + const device_vector y0_v + = [&](){ + + std::vector t(nEqns); + fill_linear(t); + t[nSpecie] = TestData::TInf(mech); + t[nSpecie + 1] = TestData::pInf(mech); + return toDeviceVector(t); + }(); + + + + TestData::odeSystemResults ret; + + { + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + + device_vector dy_v(nEqns); + auto f = + [ + =, + buffers = make_mdspan(buffers, extents<1>{1}), + y = make_mdspan(y0_v, extents<1>{nEqns}), + dy = make_mdspan(dy_v, extents<1>{nEqns}) + ] DEVICE + () + { + gpu.derivatives(y, dy, buffers[0]); + return 0; + }; + eval(f); + ret.derivative = toStdVector(dy_v); + + } + + { + + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + device_vector dy_v(nEqns); + device_vector J_v(nEqns*nEqns); + + auto f = + [ + =, + buffers = make_mdspan(buffers, extents<1>{1}), + y = make_mdspan(y0_v, extents<1>{nEqns}), + dy = make_mdspan(dy_v, extents<1>{nEqns}), + J = make_mdspan(J_v, extents<2>{nEqns, nEqns}) + ] DEVICE + () + { + gpu.jacobian(y, dy, J, buffers[0]); + return 0; + }; + eval(f); + + ret.jacobian = toStdVector(J_v); + + } + + + + return ret; + +} + + +std::vector ode_solve(TestData::Mechanism mech, std::string solver_name, gScalar xStart, gScalar xEnd, gScalar dxTry) +{ + using namespace FoamGpu; + + auto thermos = toDeviceVector(makeGpuThermos(mech)); + auto reactions = toDeviceVector(makeGpuReactions(mech)); + + const gLabel nEqns = TestData::equationCount(mech); + const gLabel nSpecie = TestData::speciesCount(mech); + + gpuODESystem system + ( + nEqns, + gLabel(reactions.size()), + make_raw_pointer(thermos.data()), + make_raw_pointer(reactions.data()) + ); + + gpuODESolverInputs i = TestData::makeGpuODEInputs(solver_name, mech); + + + auto solver = make_gpuODESolver(system, i); + + /* + device_vector y0_v + = [&](){ + + std::vector t(nEqns); + fill_linear(t); + t[nSpecie] = TestData::TInf(mech); + t[nSpecie + 1] = TestData::pInf(mech); + return toDeviceVector(t); + }(); + */ + + + device_vector y0_v = toDeviceVector(TestData::get_solution_vector(mech)); + + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + + auto f = [ + ode = solver, + xStart = xStart, + xEnd = xEnd, + y = make_mdspan(y0_v, extents<1>{nEqns}), + dxTry = dxTry, + buffers = make_mdspan(buffers, extents<1>{1}) + ] DEVICE () + { + gScalar dxTry_temp = dxTry; + ode.solve(xStart, xEnd, y, dxTry_temp, buffers[0]); + return dxTry_temp; + }; + + eval(f); + + return toStdVector(y0_v); + +} + + + +} + + + diff --git a/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.H b/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.H new file mode 100644 index 0000000..e9afa3c --- /dev/null +++ b/gpu_chemistry/unittest/cpuTestKernels/cpu_test_kernels.H @@ -0,0 +1,31 @@ +#pragma once + +#include "gpuThermo.H" +#include "mechanisms.H" +#include "results.H" + +namespace CpuTestKernels { + +TestData::constantResults constants(); + +TestData::perfectGasResult +perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight); + +TestData::thermoResults thermo(TestData::Mechanism mech); +TestData::reactionResults reaction(TestData::Mechanism mech); + +std::tuple, + std::vector, + std::vector> +lu(const std::vector& m_vals, + const std::vector& s_vals); + +TestData::odeSystemResults odesystem(TestData::Mechanism mech); + +std::vector ode_solve(TestData::Mechanism mech, + std::string solver_name, + gScalar xStart, + gScalar xEnd, + gScalar dxTry); + +} // namespace CpuTestKernels \ No newline at end of file diff --git a/gpu_chemistry/unittest/gpuTestKernels/Make/files b/gpu_chemistry/unittest/gpuTestKernels/Make/files new file mode 100644 index 0000000..72a06d4 --- /dev/null +++ b/gpu_chemistry/unittest/gpuTestKernels/Make/files @@ -0,0 +1,5 @@ +gpu_test_kernels.cu + + +LIB = $(FOAM_USER_LIBBIN)/libGpuFoamGpuTestKernels + diff --git a/gpu_chemistry/unittest/gpuTestKernels/Make/options b/gpu_chemistry/unittest/gpuTestKernels/Make/options new file mode 100644 index 0000000..0a50ce9 --- /dev/null +++ b/gpu_chemistry/unittest/gpuTestKernels/Make/options @@ -0,0 +1,42 @@ +EXE_INC = \ + -I../testHelpers/lnInclude \ + -I../../catchMain/lnInclude \ + -I../../src/lnInclude \ + -I../../src/gpuChemistryModels/lnInclude \ + -I../../src/gpuKernelEvaluator/lnInclude \ + -I../../../gpu_utils/common/lnInclude \ + -I$(LIB_SRC)/physicalProperties/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/multicomponentThermo/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/basic/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/specie/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/functions/Polynomial \ + -I$(LIB_SRC)/thermophysicalModels/chemistryModel/lnInclude \ + -I$(LIB_SRC)/ODE/lnInclude \ + -I$(LIB_SRC)/finiteVolume/lnInclude \ + -I$(LIB_SRC)/meshTools/lnInclude \ + -I$(LIB_SRC)/thermophysicalModels/chemistryModel/lnInclude \ + -std=c++17 + + +EXE_LIBS = \ + -L$(FOAM_USER_LIBBIN) \ + -lGpuChemistryTestHelpers \ + -lGpuKernelEvaluator \ + -lCatchMain \ + -lmeshTools \ + -lthermophysicalProperties \ + -lODE \ + -lOpenFOAM \ + -lspecie \ + -lfluidThermoThermophysicalTransportModels \ + -lfluidThermophysicalModels \ + -lmulticomponentThermophysicalModels \ + -lfiniteVolume \ + -lchemistryModel \ + -lblockMesh + +ifeq ($(GPUFOAM_BACKEND_NVIDIA),1) + include ../../../nvcc +else + include ../../../hipcc +endif diff --git a/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.H b/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.H new file mode 100644 index 0000000..97b31e8 --- /dev/null +++ b/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.H @@ -0,0 +1,34 @@ +#pragma once + +#include "gpuThermo.H" +#include "mechanisms.H" +#include "results.H" + +namespace GpuTestKernels { + +TestData::constantResults constants(); + +TestData::perfectGasResult +perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight); + +TestData::thermoResults thermo(TestData::Mechanism mech); +TestData::reactionResults reaction(TestData::Mechanism mech); + +std::tuple, + std::vector, + std::vector> +lu(const std::vector& m_vals, + const std::vector& s_vals); + +TestData::odeSystemResults odesystem(TestData::Mechanism mech); + +std::vector ode_solve(TestData::Mechanism mech, + std::string solver_name, + gScalar xStart, + gScalar xEnd, + gScalar dxTry); + +bool test_for_each_index(); +bool test_evaluator(gLabel nCells); + +} // namespace GpuTestKernels \ No newline at end of file diff --git a/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.cu b/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.cu new file mode 100644 index 0000000..c6a18aa --- /dev/null +++ b/gpu_chemistry/unittest/gpuTestKernels/gpu_test_kernels.cu @@ -0,0 +1,566 @@ +#include "gpu_test_kernels.H" +#include "test_utilities.H" +#include "gpuReaction.H" +#include "gpuODESystem.H" +#include "gpuKernelEvaluator.H" +#include "for_each_index.H" +#include "gpuMemoryResource.H" + +#include "ludecompose.H" +#include "create_gpu_inputs.H" +#include "makeGpuOdeSolver.H" +#include "host_device_vectors.H" + + +namespace GpuTestKernels{ + + + + + + +using memoryResource_t = FoamGpu::gpuMemoryResource; + + + + + +template +__global__ void on_device(T t, R* r) +{ + *r = t(); +} + + + +#ifdef __NVIDIA_BACKEND__ + + template + static inline gScalar eval(T t) + { + + gScalar *d_result; + gpuErrorCheck(cudaMalloc(&d_result, sizeof(gScalar))); + on_device<<<1,1>>>(t, d_result); + gpuErrorCheck(cudaGetLastError()); + gpuErrorCheck(cudaDeviceSynchronize()); + gScalar h_result; + gpuErrorCheck(cudaMemcpy(&h_result, d_result, sizeof(gScalar), cudaMemcpyDeviceToHost)); + gpuErrorCheck(cudaDeviceSynchronize()); + gpuErrorCheck(cudaFree(d_result)); + gpuErrorCheck(cudaDeviceSynchronize()); + return h_result; + + } + + //AMD-backend + #else + + template + static inline gScalar eval(T t) + { + + gScalar *d_result; + gpuErrorCheck(hipMalloc(&d_result, sizeof(gScalar))); + hipLaunchKernelGGL + ( + on_device, dim3(1), dim3(1), 0, 0, t, d_result + ); + gpuErrorCheck(hipGetLastError()); + gpuErrorCheck(hipDeviceSynchronize()); + gScalar h_result; + gpuErrorCheck(hipMemcpy(&h_result, d_result, sizeof(gScalar), hipMemcpyDeviceToHost)); + gpuErrorCheck(hipDeviceSynchronize()); + gpuErrorCheck(hipFree(d_result)); + gpuErrorCheck(hipDeviceSynchronize()); + return h_result; + + } + +#endif + + + +TestData::constantResults constants(){ + + using namespace FoamGpu; + + TestData::constantResults ret; + ret.RR = eval([] DEVICE (){return gpuRR;}); + ret.Pstd = eval([] DEVICE (){return gpuPstd;}); + ret.Tstd = eval([] DEVICE (){return gpuTstd;}); + ret.NA = eval([] DEVICE (){return gpuNA;}); + ret.k = eval([] DEVICE (){return gpuk;}); + ret.vGreat = eval([] DEVICE (){return gpuVGreat;}); + ret.vSmall = eval([] DEVICE (){return gpuVSmall;}); + ret.small = eval([] DEVICE (){return gpuSmall;}); + ret.great = eval([] DEVICE (){return gpuGreat;}); + return ret; + +} + + +TestData::perfectGasResult perfect_gas(gScalar p, gScalar T, gScalar Y, gScalar molWeight) +{ + using namespace FoamGpu; + + const gpuPerfectGas eos + ( + Y, molWeight + ); + TestData::perfectGasResult ret; + + ret.R = eval([=] DEVICE (){return eos.R();}); + ret.rho = eval([=] DEVICE (){return eos.rho(p, T);}); + ret.h = eval([=] DEVICE (){return eos.H(p, T);}); + ret.Cp = eval([=] DEVICE (){return eos.Cp(p, T);}); + ret.e = eval([=] DEVICE (){return eos.E(p, T);}); + ret.Cv = eval([=] DEVICE (){return eos.Cv(p, T);}); + ret.sp = eval([=] DEVICE (){return eos.Sp(p, T);}); + //ret.sv = eval([=] DEVICE (){return eos.Sv(p, T);}); + ret.psi = eval([=] DEVICE (){return eos.psi(p, T);}); + ret.Z = eval([=] DEVICE (){return eos.Z(p, T);}); + ret.CpMCv = eval([=] DEVICE (){return eos.CpMCv(p, T);}); + ret.alphav = eval([=] DEVICE (){return eos.alphav(p, T);}); + + + return ret; +} + + +TestData::thermoResults thermo(TestData::Mechanism mech) +{ + using namespace FoamGpu; + + const auto p = TestData::pInf(mech); + const auto T = TestData::TInf(mech); + + auto thermos_temp = TestData::makeGpuThermos(mech); + auto thermos = toDeviceVector(thermos_temp); + + + const gLabel nThermo = thermos.size(); + + TestData::thermoResults ret(nThermo); + + for (size_t i = 0; i < thermos.size(); ++i) + { + gpuThermo* thermo = make_raw_pointer(thermos.data()) + i; + + ret.W[i] = eval([=] DEVICE (){return thermo->W();}); + ret.Y[i] = eval([=] DEVICE (){return thermo->Y();}); + ret.R[i] = eval([=] DEVICE (){return thermo->R();}); + ret.Cp[i] = eval([=] DEVICE (){return thermo->Cp(p, T);}); + ret.ha[i] = eval([=] DEVICE (){return thermo->Ha(p, T);}); + ret.hs[i] = eval([=] DEVICE (){return thermo->Hs(p, T);}); + ret.hf[i] = eval([=] DEVICE (){return thermo->Hf( );}); + ret.s[i] = eval([=] DEVICE (){return thermo->S(p, T);}); + ret.gStd[i] = eval([=] DEVICE (){return thermo->Gstd(T);}); + ret.dCpdT[i] = eval([=] DEVICE (){return thermo->dCpdT(p, T);}); + ret.Cv[i] = eval([=] DEVICE (){return thermo->Cv(p, T);}); + ret.es[i] = eval([=] DEVICE (){return thermo->Es(p, T);}); + ret.ea[i] = eval([=] DEVICE (){return thermo->Ea(p, T);}); + ret.K[i] = eval([=] DEVICE (){return thermo->K(p, T);}); + ret.Kp[i] = eval([=] DEVICE (){return thermo->Kp(p, T);}); + ret.Kc[i] = eval([=] DEVICE (){return thermo->Kc(p, T);}); + ret.dKcdTbyKc[i] = eval([=] DEVICE (){return thermo->dKcdTbyKc(p, T);}); + + } + + + return ret; + +} + + + +TestData::reactionResults reaction(TestData::Mechanism mech) +{ + + using namespace FoamGpu; + + auto reactions_temp = makeGpuReactions(mech); + auto reactions = toDeviceVector(reactions_temp); + + const gLabel nSpecie = TestData::speciesCount(mech); + const gLabel nEqns = TestData::equationCount(mech); + const size_t nReactions = reactions.size(); + const gScalar p = TestData::pInf(mech); + const gScalar T = TestData::TInf(mech); + + device_vector cc = + [&](){ + std::vector t(nSpecie); + fill_linear(t); + return toDeviceVector(t); + }(); + + std::vector Thigh(nReactions); + std::vector Tlow(nReactions); + std::vector Kc(nReactions); + std::vector kf(nReactions); + std::vector kr(nReactions); + std::vector omega(nReactions); + std::vector> dNdtByV(nReactions); + std::vector> ddNdtByVdcTp(nReactions); + + for (size_t i = 0; i < nReactions; ++i){ + + gpuReaction* reaction = make_raw_pointer(reactions.data()) + i; + + Thigh[i] = eval([=] DEVICE (){return reaction->Thigh();}); + Tlow[i] = eval([=] DEVICE (){return reaction->Tlow();}); + + auto c = make_mdspan(cc, extents<1>{nSpecie}); + + Kc[i] = eval([=] DEVICE (){return reaction->Kc(p, T);}); + kf[i] = eval([=] DEVICE (){return reaction->kf(p, T, c);}); + kr[i] = eval([=] DEVICE (){return reaction->kr(p, T, c);}); + omega[i] = eval([=] DEVICE (){return reaction->omega(p, T, c);}); + + + + ///==================dNdtByV================== + device_vector dNdtByV_i = toDeviceVector(std::vector(nSpecie, 0)); + auto f = + [ + =, + res = make_mdspan(dNdtByV_i, extents<1>{nSpecie}) + ] DEVICE + (){ + reaction->dNdtByV(p, T, c, res); + return 0; + }; + eval(f); + dNdtByV[i] = toStdVector(dNdtByV_i); + + + + ///==================ddNdtByVdcTp================== + device_vector ddNdtByVdcTp_i = toDeviceVector(std::vector(nEqns*nEqns, 0)); + device_vector cTpWork0_i(nSpecie); + auto f2 = + [ + =, + res = make_mdspan(ddNdtByVdcTp_i, extents<2>{nEqns, nEqns}), + cTpWork0 = make_mdspan(cTpWork0_i, extents<1>{nSpecie}) + ] DEVICE + () + { + auto params = computeReactionParameters(*reaction, c, p, T, cTpWork0); + + reaction->ddNdtByVdcTp + ( + p, + T, + c, + res, + params + + ); + + return 0; + }; + + eval(f2); + ddNdtByVdcTp[i] = toStdVector(ddNdtByVdcTp_i); + + + } + + TestData::reactionResults ret; + ret.Thigh = Thigh; + ret.Tlow = Tlow; + ret.Kc = Kc; + ret.kf = kf; + ret.kr = kr; + ret.omega = omega; + ret.dNdtByV = dNdtByV; + ret.ddNdtByVdcTp = ddNdtByVdcTp; + return ret; + +} + + +std::tuple, std::vector, std::vector> +lu(const std::vector& m_vals, const std::vector& s_vals) +{ + + gLabel size = std::sqrt(m_vals.size()); + + device_vector matrix(m_vals.begin(), m_vals.end()); + device_vector pivot = toDeviceVector(std::vector(size, 0)); + //device_vector pivot(size, 0); + //device_vector v(size, 0); + device_vector v = toDeviceVector(std::vector(size, 0)); + device_vector source(s_vals.begin(), s_vals.end()); + + auto m_span = make_mdspan(matrix, extents<2>{size, size}); + auto p_span = make_mdspan(pivot, extents<1>{size}); + auto v_span = make_mdspan(v, extents<1>{size}); + + eval + ( + [=] DEVICE (){FoamGpu::LUDecompose(m_span, p_span, v_span); return 0;} + ); + + auto s_span = make_mdspan(source, extents<1>{size}); + + eval + ( + [=] DEVICE (){FoamGpu::LUBacksubstitute(m_span, p_span, s_span); return 0;} + ); + + + auto m_ret = toStdVector(matrix); + auto p_ret = toStdVector(pivot); + auto s_ret = toStdVector(source); + + return std::make_tuple(m_ret, p_ret, s_ret); + +} + + +TestData::odeSystemResults odesystem(TestData::Mechanism mech) +{ + using namespace FoamGpu; + + auto gpu_thermos = toDeviceVector(makeGpuThermos(mech)); + auto gpu_reactions = toDeviceVector(makeGpuReactions(mech)); + const auto nEqns = TestData::equationCount(mech); + const auto nSpecie = TestData::speciesCount(mech); + + gpuODESystem gpu + ( + nEqns, + gLabel(gpu_reactions.size()), + make_raw_pointer(gpu_thermos.data()), + make_raw_pointer(gpu_reactions.data()) + ); + + + const device_vector y0_v + = [&](){ + + std::vector t(nEqns); + fill_linear(t); + t[nSpecie] = TestData::TInf(mech); + t[nSpecie + 1] = TestData::pInf(mech); + return toDeviceVector(t); + }(); + + + + TestData::odeSystemResults ret; + + { + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + + device_vector dy_v(nEqns); + auto f = + [ + =, + buffers = make_mdspan(buffers, extents<1>{1}), + y = make_mdspan(y0_v, extents<1>{nEqns}), + dy = make_mdspan(dy_v, extents<1>{nEqns}) + ] DEVICE + () + { + gpu.derivatives(y, dy, buffers[0]); + return 0; + }; + eval(f); + ret.derivative = toStdVector(dy_v); + + } + + { + + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + device_vector dy_v(nEqns); + device_vector J_v(nEqns*nEqns); + + auto f = + [ + =, + buffers = make_mdspan(buffers, extents<1>{1}), + y = make_mdspan(y0_v, extents<1>{nEqns}), + dy = make_mdspan(dy_v, extents<1>{nEqns}), + J = make_mdspan(J_v, extents<2>{nEqns, nEqns}) + ] DEVICE + () + { + gpu.jacobian(y, dy, J, buffers[0]); + return 0; + }; + eval(f); + + ret.jacobian = toStdVector(J_v); + + } + + + + return ret; + +} + + +std::vector ode_solve(TestData::Mechanism mech, std::string solver_name, gScalar xStart, gScalar xEnd, gScalar dxTry) +{ + using namespace FoamGpu; + + auto thermos = toDeviceVector(makeGpuThermos(mech)); + auto reactions = toDeviceVector(makeGpuReactions(mech)); + + const gLabel nEqns = TestData::equationCount(mech); + const gLabel nSpecie = TestData::speciesCount(mech); + + gpuODESystem system + ( + nEqns, + gLabel(reactions.size()), + make_raw_pointer(thermos.data()), + make_raw_pointer(reactions.data()) + ); + + gpuODESolverInputs i = TestData::makeGpuODEInputs(solver_name, mech); + + + auto solver = make_gpuODESolver(system, i); + + /* + device_vector y0_v + = [&](){ + + std::vector t(nEqns); + fill_linear(t); + t[nSpecie] = TestData::TInf(mech); + t[nSpecie + 1] = TestData::pInf(mech); + return toDeviceVector(t); + }(); + */ + + device_vector y0_v = toDeviceVector(TestData::get_solution_vector(mech)); + + memoryResource_t memory(1, nSpecie); + auto buffers = toDeviceVector(splitToBuffers(memory)); + + auto f = [ + ode = solver, + xStart = xStart, + xEnd = xEnd, + y = make_mdspan(y0_v, extents<1>{nEqns}), + dxTry = dxTry, + buffers = make_mdspan(buffers, extents<1>{1}) + ] DEVICE () + { + gScalar dxTry_temp = dxTry; + ode.solve(xStart, xEnd, y, dxTry_temp, buffers[0]); + return dxTry_temp; + }; + + eval(f); + + return toStdVector(y0_v); + +} + + +bool test_for_each_index(){ + + using namespace FoamGpu; + + device_vector v1 = toDeviceVector(std::vector(100, 1.0)); + device_vector v2 = toDeviceVector(std::vector(100, 2.0)); + device_vector v3 = toDeviceVector(std::vector(100, 3.0)); + + + + auto op = [v1 = v1.data(), v2 = v2.data(), v3 = v3.data()] DEVICE (gLabel idx){ + + v1[idx] = v2[idx] + v3[idx] + 4.0; + + }; + + for_each_index(op, 100); + + std::vector correct(100, 2.0 + 3.0 + 4.0); + return toStdVector(v1) == correct; + +} + +/* +bool test_evaluator(gLabel nCells){ + + using namespace FoamGpu; + + const auto m = TestData::GRI; + auto thermos = TestData::makeGpuThermos(m); + auto reactions = TestData::makeGpuReactions(m); + gLabel nSpecie = TestData::speciesCount(m); + gLabel nEqns = TestData::equationCount(m); + gpuODESolverInputs inputs; + inputs.name = "Rosenbrock34"; + inputs.absTol = 1E-6; + inputs.relTol = 1e-1; + GpuKernelEvaluator evaluator(nCells, nEqns, nSpecie, thermos, reactions, inputs); + + gScalar deltaT = 1e-5; + gScalar deltaTChemMax = deltaT/4; + std::vector deltaTChem(nCells, deltaT/10); + + + std::vector rho(nCells, 1.0); + + + std::vector Yvf(nCells*nEqns); + + auto s = make_mdspan(Yvf, extents<2>{nCells, nEqns}); + + auto Yi = TestData::get_solution_vector(m); + + for (gLabel celli = 0; celli < nCells; ++celli) { + for (gLabel i = 0; i < nEqns; ++i){ + s(celli, i) = Yi[i]; + } + + } + + auto tuple = evaluator.computeYNew + ( + deltaT, + deltaTChemMax, + deltaTChem, + Yvf + ); + + auto newY = std::get<0>(tuple); + auto newDeltaTs = std::get<0>(tuple); + + + + auto s2 = make_mdspan(newY, extents<2>{nCells, nEqns}); + + for (gLabel i = 0; i < nEqns; ++i){ + std::cout << s2(0, i) << std::endl; + } + + + return newY[0] != 0; + + + + //return true; + + +} +*/ + + +} + + + diff --git a/gpu_chemistry/unittest/testHelpers/Make/files b/gpu_chemistry/unittest/testHelpers/Make/files index e1b8a1a..68451ef 100644 --- a/gpu_chemistry/unittest/testHelpers/Make/files +++ b/gpu_chemistry/unittest/testHelpers/Make/files @@ -1,6 +1,4 @@ -testHelpers.C create_foam_inputs.C create_gpu_inputs.C - LIB = $(FOAM_USER_LIBBIN)/libGpuChemistryTestHelpers diff --git a/gpu_chemistry/unittest/testHelpers/create_foam_inputs.H b/gpu_chemistry/unittest/testHelpers/create_foam_inputs.H index 27a2d4f..4f7ff3f 100644 --- a/gpu_chemistry/unittest/testHelpers/create_foam_inputs.H +++ b/gpu_chemistry/unittest/testHelpers/create_foam_inputs.H @@ -1,7 +1,7 @@ #pragma once #include "mechanisms.H" -#include "testThermoType.H" +#include "foam_thermo_types.H" #include "speciesTable.H" diff --git a/gpu_chemistry/unittest/testHelpers/testThermoType.H b/gpu_chemistry/unittest/testHelpers/foam_thermo_types.H similarity index 100% rename from gpu_chemistry/unittest/testHelpers/testThermoType.H rename to gpu_chemistry/unittest/testHelpers/foam_thermo_types.H diff --git a/gpu_chemistry/unittest/testHelpers/mechanisms.H b/gpu_chemistry/unittest/testHelpers/mechanisms.H index cb8b603..896be58 100644 --- a/gpu_chemistry/unittest/testHelpers/mechanisms.H +++ b/gpu_chemistry/unittest/testHelpers/mechanisms.H @@ -17,6 +17,18 @@ static inline gLabel equationCount(Mechanism m) { return speciesCount(m) + 2; } +static inline gScalar pInf(Mechanism m){ + if (m == Mechanism::GRI) { + return 1.36789e+06; + } else { + return 202650.0; + } +} + +static inline gScalar TInf(Mechanism m){ + return 1000.0; +} + template static inline void assign_gri(T& f) { constexpr int CH4_idx = 0; @@ -26,8 +38,8 @@ template static inline void assign_gri(T& f) { constexpr double CH4_val = 0.2; constexpr double O2_val = 0.5; constexpr double N2_val = 0.3; - constexpr double T_inf = 1000.0; - constexpr double p_inf = 1.36789e+06; + double T_inf = TInf(Mechanism::GRI); + double p_inf = pInf(Mechanism::GRI); gLabel nSpecie = speciesCount(GRI); @@ -49,8 +61,8 @@ template static inline void assign_h2(T& f) { constexpr double H2_val = 0.2; constexpr double O2_val = 0.2; constexpr double N2_val = 0.6; - constexpr double T_inf = 1000.0; - constexpr double p_inf = 202650; + double T_inf = TInf(Mechanism::H2); + double p_inf = pInf(Mechanism::H2); gLabel nSpecie = speciesCount(H2); @@ -98,4 +110,24 @@ static inline void assign_test_concentration(T& f, Mechanism m) } +static inline std::vector get_concentration_vector(Mechanism m){ + + gLabel nSpecie = speciesCount(m); + std::vector c(nSpecie); + assign_test_concentration(c, m); + return c; + +} + +static inline std::vector get_solution_vector(Mechanism m){ + + gLabel nEqns = equationCount(m); + std::vector y(nEqns); + assign_test_condition(y, m); + return y; + +} + + + } // namespace TestData diff --git a/gpu_chemistry/unittest/testHelpers/results.H b/gpu_chemistry/unittest/testHelpers/results.H new file mode 100644 index 0000000..41da112 --- /dev/null +++ b/gpu_chemistry/unittest/testHelpers/results.H @@ -0,0 +1,88 @@ +#pragma once + +#include +#include "gpu_constants.H" + + +namespace TestData{ + +struct constantResults{ + gScalar RR; + gScalar Pstd; + gScalar Tstd; + gScalar NA; + gScalar k; + gScalar vGreat; + gScalar vSmall; + gScalar small; + gScalar great; + +}; + +struct perfectGasResult +{ + gScalar R; + gScalar rho; + gScalar h; + gScalar Cp; + gScalar e; + gScalar Cv; + gScalar sp; + //gScalar sv; + gScalar psi; + gScalar Z; + gScalar CpMCv; + gScalar alphav; + +}; + +struct thermoResults{ + + thermoResults() = default; + + thermoResults(gLabel n) : + W(n), Y(n), R(n), Cp(n), ha(n), hs(n), hf(n), s(n), gStd(n), dCpdT(n), + Cv(n), es(n), ea(n), K(n), Kp(n), Kc(n), dKcdTbyKc(n) + {} + + std::vector W; + std::vector Y; + std::vector R; + std::vector Cp; + std::vector ha; + std::vector hs; + std::vector hf; + std::vector s; + std::vector gStd; + std::vector dCpdT; + std::vector Cv; + std::vector es; + std::vector ea; + std::vector K; + std::vector Kp; + std::vector Kc; + std::vector dKcdTbyKc; + +}; + +struct reactionResults{ + + std::vector Thigh; + std::vector Tlow; + std::vector Kc; + std::vector kf; + std::vector kr; + std::vector omega; + std::vector> dNdtByV; + std::vector> ddNdtByVdcTp; +}; + +struct odeSystemResults{ + + std::vector derivative; + std::vector jacobian; + +}; + + +} \ No newline at end of file diff --git a/gpu_chemistry/unittest/testHelpers/testHelpers.C b/gpu_chemistry/unittest/testHelpers/testHelpers.C deleted file mode 100644 index 09e34d4..0000000 --- a/gpu_chemistry/unittest/testHelpers/testHelpers.C +++ /dev/null @@ -1,3 +0,0 @@ -#include "test_utilities.H" -#include "create_gpu_inputs.H" -#include "create_foam_inputs.H" diff --git a/gpu_chemistry/unittest/testHelpers/test_utilities.H b/gpu_chemistry/unittest/testHelpers/test_utilities.H index d90d35d..a5ef04b 100644 --- a/gpu_chemistry/unittest/testHelpers/test_utilities.H +++ b/gpu_chemistry/unittest/testHelpers/test_utilities.H @@ -1,99 +1,45 @@ #pragma once -#include -#include -#include "error_handling.H" +#include //rand -#include "host_device_vectors.H" -#include "gpuThermo.H" -#include "gpuMemoryResource.H" -#include "cpuMemoryResource.H" - - - -#ifdef __NVIDIA_COMPILER__ - - -using memoryResource_t = FoamGpu::gpuMemoryResource; - -#else - - -using memoryResource_t = FoamGpu::cpuMemoryResource; - -#endif - - - -//Note, here the "error" is a comparison against a cpu result. -//When compiled with an nvidia compiler, the arithmetic operations are computed -//differently since nvcc and nvc++ use fused multiply add (fma) in many places. -//Therefore a slightly more tolerance is allowed when comparing gpu result agains -//cpu result. For better match use the -nofma switch. However, we want to allow -//for the compiler to do optimizations. -#ifdef __NVIDIA_COMPILER__ -constexpr double errorTol = 1E-7; -#else -constexpr double errorTol = 1E-9; -#endif - -#ifdef __NVIDIA_COMPILER__ - - -template -__global__ void on_device(T t, R* r) -{ - *r = t(); +static inline double random_number(double LO, double HI){ + double r = LO + static_cast (rand()) /( static_cast (RAND_MAX/(HI-LO))); + return r; } - template -static inline gScalar eval(T t) +static inline void fill_random(T& v, double LO = 0.0, double HI = 1.0) { - gScalar *d_result; - gpuErrorCheck(cudaMalloc(&d_result, sizeof(gScalar))); - on_device<<<1,1>>>(t, d_result); - gpuErrorCheck(cudaGetLastError()) - gpuErrorCheek(cudaDeviceSynchronize()); - gScalar h_result; - gpuErrorCheck(cudaMemcpy(&h_result, d_result, sizeof(gScalar), cudaMemcpyDeviceToHost)); - gpuErrorCheck(cudaDeviceSynchronize()); - gpuErrorCheck(cudaFree(d_result)); - gpuErrorCheck(cudaDeviceSynchronize()); - return h_result; -} - -#else + for (auto& e : v) + { + e = random_number(LO, HI); + } -template -static inline gScalar eval(T t) -{ - return t(); } -#endif - +template +static inline void fill_linear(T& v){ + double dx = 1.0 / v.size(); + for (int i = 0; i < static_cast(v.size()); ++i){ + v[i] = dx*i + dx; + } -static inline double random_number(double LO, double HI){ - double r = LO + static_cast (rand()) /( static_cast (RAND_MAX/(HI-LO))); - return r; } -template -static inline void fill_random(T& v, double LO = 0.0, double HI = 1.0) -{ - +template +static inline void remove_negative(C& c, double tolerance){ - for (auto& e : v) - { - e = random_number(LO, HI); + for (auto& e : c){ + if (e < tolerance){ + e = double(0); + } } } diff --git a/gpu_chemistry/unittest/tests/Make/files b/gpu_chemistry/unittest/tests/Make/files index 1121bde..f0127e3 100644 --- a/gpu_chemistry/unittest/tests/Make/files +++ b/gpu_chemistry/unittest/tests/Make/files @@ -1,12 +1,3 @@ -Test-eos.C -Test-thermo.C -Test-reaction.C -Test-memoryResource.C -Test-ludecompose.C -Test-odeSystem.C -Test-ode.C -Test-evaluator.C -Test-utilities.C - -EXE = $(FOAM_USER_APPBIN)/testGpuChemistry +Test-gpuChemistry.C +EXE = $(FOAM_USER_APPBIN)/Test-gpuChemistry diff --git a/gpu_chemistry/unittest/tests/Make/options b/gpu_chemistry/unittest/tests/Make/options index d98a704..2ee1001 100644 --- a/gpu_chemistry/unittest/tests/Make/options +++ b/gpu_chemistry/unittest/tests/Make/options @@ -1,5 +1,8 @@ EXE_INC = \ -I../testHelpers/lnInclude \ + -I../OpenFOAMReferenceKernels/lnInclude \ + -I../gpuTestKernels/lnInclude \ + -I../cpuTestKernels/lnInclude \ -I../../catchMain/lnInclude \ -I../../src/lnInclude \ -I../../src/gpuChemistryModels/lnInclude \ @@ -22,6 +25,10 @@ EXE_LIBS = \ -L$(FOAM_USER_LIBBIN) \ -lGpuChemistryTestHelpers \ -lGpuKernelEvaluator \ + -lGpuFoamCpuTestKernels \ + -lGpuFoamGpuTestKernels \ + -lGpuFoamOpenFOAMReferenceKernels \ + -lGpuFoamCpuResults \ -lCatchMain \ -lmeshTools \ -lthermophysicalProperties \ @@ -34,5 +41,3 @@ EXE_LIBS = \ -lfiniteVolume \ -lchemistryModel \ -lblockMesh - -#include ../nvcpp \ No newline at end of file diff --git a/gpu_chemistry/unittest/tests/Test-eos.C b/gpu_chemistry/unittest/tests/Test-eos.C deleted file mode 100644 index 70a6015..0000000 --- a/gpu_chemistry/unittest/tests/Test-eos.C +++ /dev/null @@ -1,148 +0,0 @@ -#include "catch.H" - - -#include "test_utilities.H" -#include "create_foam_inputs.H" -#include "create_gpu_inputs.H" - -#include "thermodynamicConstants.H" -#include "fundamentalConstants.H" -#include "physicoChemicalConstants.H" -#include "specieExponent.H" - -TEST_CASE("Test gpuConstans") -{ - SECTION("Physical") - { - - CHECK(eval([](){return gpuRR;}) == Foam::constant::thermodynamic::RR); - CHECK(eval([](){return gpuPstd;}) == Foam::constant::thermodynamic::Pstd); - CHECK(eval([](){return gpuTstd;}) == Foam::constant::thermodynamic::Tstd); - CHECK(eval([](){return gpuNA;}) == Foam::constant::physicoChemical::NA.value()); - CHECK(eval([](){return gpuk;}) == Foam::constant::physicoChemical::k.value()); - - } - - SECTION("Numeric") - { - CHECK(eval([](){return gpuVGreat;}) == Foam::vGreat); - CHECK(eval([](){return gpuVSmall;}) == Foam::vSmall); - CHECK(eval([](){return gpuSmall;}) == Foam::small); - CHECK(eval([](){return gpuGreat;}) == Foam::great); - } -} - - -TEST_CASE("Test perfectGas"){ - - using namespace FoamGpu; - - SECTION("Constructors") - { - REQUIRE_NOTHROW(gpuPerfectGas()); - } - - - gScalar molWeight = 0.32; - gScalar Y = 0.1; - - const Foam::perfectGas cpu - ( - Foam::specie("temp", Y, molWeight) - //specie(dict) //Can not construct from dict because of implicit conversions... - ); - - const gpuPerfectGas gpu - ( - Y, molWeight - ); - - - SECTION("thermo properties") - { - const gScalar p = 1E5; - const gScalar T = 3542.324; - - CHECK(eval([=](){return gpu.R();}) == Approx(cpu.R()).epsilon(errorTol)); - CHECK(eval([=](){return gpu.rho(p, T);}) == Approx(cpu.rho(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.H(p, T);}) == Approx(cpu.h(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.Cp(p, T);}) == Approx(cpu.Cp(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.E(p, T);}) == Approx(cpu.e(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.Cv(p, T);}) == Approx(cpu.Cv(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.Sp(p, T);}) == Approx(cpu.sp(p, T)).epsilon(errorTol)); - //CHECK(eval([=](){return gpu.Sv(p, T);}) == cpu.Sv(p, T)); //throws - CHECK(eval([=](){return gpu.psi(p, T);}) == Approx(cpu.psi(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.Z(p, T);}) == Approx(cpu.Z(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.CpMCv(p, T);}) == Approx(cpu.CpMCv(p, T)).epsilon(errorTol)); - CHECK(eval([=](){return gpu.alphav(p, T);}) == Approx(cpu.alphav(p, T)).epsilon(errorTol)); - - } - - - //Arithmetic operations are not run on kernels, only upon construction. - //Therefore only tested here on the host - - SECTION("operator+=") - { - - SECTION("self assignment") - { - auto gpu1(gpu); - auto cpu1(cpu); - - gpu1 += gpu1; - cpu1 += cpu1; - - CHECK(gpu1.W() == cpu1.W()); - CHECK(gpu1.Y() == cpu1.Y()); - } - - SECTION("non-self assignment") - { - auto gpu1(gpu); - auto cpu1(cpu); - auto gpu2(gpu); - auto cpu2(cpu); - - gpu1 += gpu2; - cpu1 += cpu2; - - CHECK(gpu1.W() == cpu1.W()); - CHECK(gpu1.Y() == cpu1.Y()); - } - - } - - SECTION("operator+") - { - auto gpu1(gpu); - auto cpu1(cpu); - auto gpu2(gpu); - auto cpu2(cpu); - - CHECK((gpu1+gpu2).W() == (cpu1+cpu2).W()); - CHECK((gpu1+gpu2).Y() == (cpu1+cpu2).Y()); - - } - SECTION("operator*") - { - auto gpu1(gpu); - auto cpu1(cpu); - CHECK((3*gpu1).W() == (3*cpu1).W()); - CHECK((3*gpu1).Y() == (3*cpu1).Y()); - - } - - SECTION("operator==") - { - auto gpu1(gpu); - auto cpu1(cpu); - auto gpu2(gpu); - auto cpu2(cpu); - - CHECK((gpu1==gpu2).W() == (cpu1==cpu2).W()); - CHECK((gpu1==gpu2).Y() == (cpu1==cpu2).Y()); - - } - -} \ No newline at end of file diff --git a/gpu_chemistry/unittest/tests/Test-evaluator.C b/gpu_chemistry/unittest/tests/Test-evaluator.C deleted file mode 100644 index c8b6ec1..0000000 --- a/gpu_chemistry/unittest/tests/Test-evaluator.C +++ /dev/null @@ -1,36 +0,0 @@ -#include "catch.H" - - -#include "gpuKernelEvaluator.H" -#include "test_utilities.H" -#include "create_foam_inputs.H" -#include "create_gpu_inputs.H" - -TEST_CASE("Test GpuKernelEvaluator") -{ - using namespace FoamGpu; - - SECTION("Constructors") - { - REQUIRE_NOTHROW(GpuKernelEvaluator()); - - const auto m = TestData::GRI; - auto thermos = TestData::makeGpuThermos(m); - auto reactions = TestData::makeGpuReactions(m); - gLabel nSpecie = TestData::speciesCount(m); - gLabel nEqns = TestData::equationCount(m); - gLabel nCells = 10; - gpuODESolverInputs inputs; - inputs.name = "Rosenbrock34"; - - REQUIRE_NOTHROW(GpuKernelEvaluator(nCells, nEqns, nSpecie, thermos, reactions, inputs)); - - - - - } - - - - -} \ No newline at end of file diff --git a/gpu_chemistry/unittest/tests/Test-gpuChemistry.C b/gpu_chemistry/unittest/tests/Test-gpuChemistry.C new file mode 100644 index 0000000..8e6a776 --- /dev/null +++ b/gpu_chemistry/unittest/tests/Test-gpuChemistry.C @@ -0,0 +1,807 @@ +#include "catch.H" + +#include "openfoam_reference_kernels.H" +#include "gpu_test_kernels.H" +#include "cpu_test_kernels.H" +#include "test_utilities.H" +#include "mechanisms.H" +#include "mdspan.H" + + +TEST_CASE("make_mdspan", "[CPU]"){ + + std::vector v = {1,2,3,4,5,6}; + auto s = make_mdspan(v, extents<2>(2,3)); + CHECK(s(0,0) == 1); + + CHECK(s.size() == 2*3); + +} + + + +TEST_CASE("Test gpuConstants (on CPU)", "[CPU]") +{ + auto reference = OFReferenceKernels::constants(); + auto test_result = CpuTestKernels::constants(); + + SECTION("Physical") + { + CHECK(test_result.RR == reference.RR); + CHECK(test_result.Pstd == reference.Pstd); + CHECK(test_result.Tstd == reference.Tstd); + CHECK(test_result.NA == reference.NA); + CHECK(test_result.k == reference.k); + + } + + SECTION("Numeric") + { + CHECK(test_result.vGreat == reference.vGreat); + CHECK(test_result.vSmall == reference.vSmall); + CHECK(test_result.small == reference.small); + CHECK(test_result.great == reference.great); + + } +} + +TEST_CASE("Test gpuConstants (on GPU)", "[GPU]") +{ + auto reference = OFReferenceKernels::constants(); + auto test_result = GpuTestKernels::constants(); + + SECTION("Physical") + { + CHECK(test_result.RR == reference.RR); + CHECK(test_result.Pstd == reference.Pstd); + CHECK(test_result.Tstd == reference.Tstd); + CHECK(test_result.NA == reference.NA); + CHECK(test_result.k == reference.k); + + } + + SECTION("Numeric") + { + CHECK(test_result.vGreat == reference.vGreat); + CHECK(test_result.vSmall == reference.vSmall); + CHECK(test_result.small == reference.small); + CHECK(test_result.great == reference.great); + + } +} + + + +TEST_CASE("Test perfectGas (on CPU)", "[CPU]"){ + + using namespace FoamGpu; + + SECTION("thermo properties") + { + const gScalar p = 1E5; + const gScalar T = 3542.324; + const gScalar molWeight = 0.32; + const gScalar Y = 0.1; + + auto reference = OFReferenceKernels::perfect_gas(p, T, Y, molWeight); + auto test_result = CpuTestKernels::perfect_gas(p, T, Y, molWeight); + + CHECK(test_result.R == reference.R); + CHECK(test_result.rho == reference.rho); + CHECK(test_result.h == reference.h); + CHECK(test_result.Cp == reference.Cp); + CHECK(test_result.e == reference.e); + CHECK(test_result.Cv == reference.Cv); + CHECK(test_result.sp == reference.sp); + CHECK(test_result.psi == reference.psi); + CHECK(test_result.Z == reference.Z); + CHECK(test_result.CpMCv == reference.CpMCv); + CHECK(test_result.alphav == reference.alphav); + + } + +} + +TEST_CASE("Test perfectGas (on GPU)", "[GPU]"){ + + using namespace FoamGpu; + + SECTION("thermo properties") + { + const gScalar p = 1E5; + const gScalar T = 3542.324; + const gScalar molWeight = 0.32; + const gScalar Y = 0.1; + + auto reference = OFReferenceKernels::perfect_gas(p, T, Y, molWeight); + auto test_result = GpuTestKernels::perfect_gas(p, T, Y, molWeight); + + CHECK(test_result.R == reference.R); + CHECK(test_result.rho == reference.rho); + CHECK(test_result.h == reference.h); + CHECK(test_result.Cp == reference.Cp); + CHECK(test_result.e == reference.e); + CHECK(test_result.Cv == reference.Cv); + CHECK(test_result.sp == reference.sp); + CHECK(test_result.psi == reference.psi); + CHECK(test_result.Z == reference.Z); + CHECK(test_result.CpMCv == reference.CpMCv); + CHECK(test_result.alphav == reference.alphav); + + } + +} + + + + +static inline void thermoTests(TestData::thermoResults& test_result, TestData::thermoResults& reference, double errorTol) +{ + + CHECK_THAT + ( + reference.W, + Catch::Matchers::Approx(test_result.W).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Y, + Catch::Matchers::Approx(test_result.Y).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.R, + Catch::Matchers::Approx(test_result.R).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Cp, + Catch::Matchers::Approx(test_result.Cp).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.ha, + Catch::Matchers::Approx(test_result.ha).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.hs, + Catch::Matchers::Approx(test_result.hs).epsilon(errorTol) + ); + + + remove_negative(reference.hf, errorTol); + remove_negative(test_result.hf, errorTol); + + CHECK_THAT + ( + reference.hf, + Catch::Matchers::Approx(test_result.hf).epsilon(errorTol) + ); + + CHECK_THAT + ( + reference.s, + Catch::Matchers::Approx(test_result.s).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.gStd, + Catch::Matchers::Approx(test_result.gStd).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.dCpdT, + Catch::Matchers::Approx(test_result.dCpdT).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Cv, + Catch::Matchers::Approx(test_result.Cv).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.es, + Catch::Matchers::Approx(test_result.es).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.ea, + Catch::Matchers::Approx(test_result.ea).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.K, + Catch::Matchers::Approx(test_result.K).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Kp, + Catch::Matchers::Approx(test_result.Kp).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Kc, + Catch::Matchers::Approx(test_result.Kc).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.dKcdTbyKc, + Catch::Matchers::Approx(test_result.dKcdTbyKc).epsilon(errorTol) + ); + + +} + +TEST_CASE("Test gpuThermo (on CPU)", "[CPU]") +{ + + constexpr double errorTol = 1E-9; + SECTION("GRI") + { + auto test_result = CpuTestKernels::thermo(TestData::GRI); + auto reference = OFReferenceKernels::thermo(TestData::GRI); + thermoTests(test_result, reference, errorTol); + } + + SECTION("H2") + { + auto test_result = CpuTestKernels::thermo(TestData::H2); + auto reference = OFReferenceKernels::thermo(TestData::H2); + thermoTests(test_result, reference, errorTol); + } + +} + +TEST_CASE("Test gpuThermo (on GPU)", "[GPU]") +{ + + constexpr double errorTol = 1E-7; + SECTION("GRI") + { + auto test_result = GpuTestKernels::thermo(TestData::GRI); + auto reference = OFReferenceKernels::thermo(TestData::GRI); + thermoTests(test_result, reference, errorTol); + } + + SECTION("H2") + { + auto test_result = GpuTestKernels::thermo(TestData::H2); + auto reference = OFReferenceKernels::thermo(TestData::H2); + thermoTests(test_result, reference, errorTol); + } + +} + + + + + + + + + +static inline void reactionTests(const TestData::reactionResults& test_result, const TestData::reactionResults& reference, double errorTol) +{ + + using namespace FoamGpu; + + CHECK_THAT + ( + reference.Thigh, + Catch::Matchers::Approx(test_result.Thigh).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Tlow, + Catch::Matchers::Approx(test_result.Tlow).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.Kc, + Catch::Matchers::Approx(test_result.Kc).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.kf, + Catch::Matchers::Approx(test_result.kf).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.kr, + Catch::Matchers::Approx(test_result.kr).epsilon(errorTol) + ); + CHECK_THAT + ( + reference.omega, + Catch::Matchers::Approx(test_result.omega).epsilon(errorTol) + ); + + + + for (size_t i = 0; i < reference.dNdtByV.size(); ++i) + { + REQUIRE_THAT + ( + reference.dNdtByV[i], + Catch::Matchers::Approx(test_result.dNdtByV[i]).epsilon(errorTol) + ); + } + + + for (size_t i = 0; i < reference.ddNdtByVdcTp.size(); ++i) + { + REQUIRE_THAT + ( + reference.ddNdtByVdcTp[i], + Catch::Matchers::Approx(test_result.ddNdtByVdcTp[i]).epsilon(errorTol) + ); + } + + +} + +TEST_CASE("Test gpuReaction (on CPU)", "[CPU]") +{ + constexpr double errorTol = 1E-9; + + + SECTION("GRI") + { + + auto test_result = CpuTestKernels::reaction(TestData::GRI); + auto reference = OFReferenceKernels::reaction(TestData::GRI); + reactionTests(test_result, reference, errorTol); + } + + SECTION("H2") + { + + auto test_result = CpuTestKernels::reaction(TestData::H2); + auto reference = OFReferenceKernels::reaction(TestData::H2); + reactionTests(test_result, reference, errorTol); + } + + +} + +TEST_CASE("Test gpuReaction (on GPU)", "[GPU]") +{ + constexpr double errorTol = 1E-9; + + SECTION("GRI") + { + + auto test_result = GpuTestKernels::reaction(TestData::GRI); + auto reference = OFReferenceKernels::reaction(TestData::GRI); + reactionTests(test_result, reference, errorTol); + } + + SECTION("H2") + { + + auto test_result = GpuTestKernels::reaction(TestData::H2); + auto reference = OFReferenceKernels::reaction(TestData::H2); + reactionTests(test_result, reference, errorTol); + } + + +} + + + +TEST_CASE("Test ludecompose (on CPU)", "[CPU]") +{ + using namespace FoamGpu; + + constexpr double errorTol = 1E-9; + + + for (int i = 3; i < 50; ++i) + { + int size = i; + + + std::vector vals(size*size); + fill_random(vals); + std::vector source(size, 1); + + + auto [m_test, p_test, s_test] = CpuTestKernels::lu(vals, source); + auto [m_ref, p_ref, s_ref] = OFReferenceKernels::lu(vals, source); + + REQUIRE_THAT + ( + m_test, + Catch::Matchers::Approx(m_ref).epsilon(errorTol) + ); + REQUIRE_THAT + ( + p_test, + Catch::Matchers::Approx(p_ref).epsilon(errorTol) + ); + REQUIRE_THAT + ( + s_test, + Catch::Matchers::Approx(s_ref).epsilon(errorTol) + ); + + } +} + +TEST_CASE("Test ludecompose (on GPU)", "[GPU]") +{ + using namespace FoamGpu; + + constexpr double errorTol = 1E-9; + + + for (int i = 3; i < 50; ++i) + { + int size = i; + + std::vector vals(size*size); + fill_random(vals); + std::vector source(size, 1); + + + auto [m_test, p_test, s_test] = GpuTestKernels::lu(vals, source); + auto [m_ref, p_ref, s_ref] = OFReferenceKernels::lu(vals, source); + + REQUIRE_THAT + ( + m_test, + Catch::Matchers::Approx(m_ref).epsilon(errorTol) + ); + REQUIRE_THAT + ( + p_test, + Catch::Matchers::Approx(p_ref).epsilon(errorTol) + ); + REQUIRE_THAT + ( + s_test, + Catch::Matchers::Approx(s_ref).epsilon(errorTol) + ); + + } +} + + + +TEST_CASE("Test gpuOdeSystem (on CPU)", "[CPU]") +{ + using namespace FoamGpu; + + constexpr double errorTol = 1E-9; + + + SECTION("H2"){ + + auto test_result = CpuTestKernels::odesystem(TestData::Mechanism::H2); + auto reference = OFReferenceKernels::odesystem(TestData::Mechanism::H2); + + + CHECK_THAT + ( + test_result.derivative, + Catch::Matchers::Approx(reference.derivative).epsilon(errorTol) + ); + + CHECK_THAT + ( + test_result.jacobian, + Catch::Matchers::Approx(reference.jacobian).epsilon(errorTol) + ); + + + } + + SECTION("GRI"){ + + auto test_result = CpuTestKernels::odesystem(TestData::Mechanism::GRI); + auto reference = OFReferenceKernels::odesystem(TestData::Mechanism::GRI); + + CHECK_THAT + ( + test_result.derivative, + Catch::Matchers::Approx(reference.derivative).epsilon(errorTol) + ); + + CHECK_THAT + ( + test_result.jacobian, + Catch::Matchers::Approx(reference.jacobian).epsilon(errorTol) + ); + + + } +} + +TEST_CASE("Test gpuOdeSystem (on GPU)", "[GPU]") +{ + using namespace FoamGpu; + + constexpr double errorTol = 1E-9; + + + SECTION("H2"){ + + auto test_result = GpuTestKernels::odesystem(TestData::Mechanism::H2); + auto reference = OFReferenceKernels::odesystem(TestData::Mechanism::H2); + + + CHECK_THAT + ( + test_result.derivative, + Catch::Matchers::Approx(reference.derivative).epsilon(errorTol) + ); + + CHECK_THAT + ( + test_result.jacobian, + Catch::Matchers::Approx(reference.jacobian).epsilon(errorTol) + ); + + + } + + SECTION("GRI"){ + + auto test_result = GpuTestKernels::odesystem(TestData::Mechanism::GRI); + auto reference = OFReferenceKernels::odesystem(TestData::Mechanism::GRI); + + CHECK_THAT + ( + test_result.derivative, + Catch::Matchers::Approx(reference.derivative).epsilon(errorTol) + ); + + CHECK_THAT + ( + test_result.jacobian, + Catch::Matchers::Approx(reference.jacobian).epsilon(errorTol) + ); + + + } +} + + +TEST_CASE("Test gpuOdeSolver (on CPU)", "[CPU]") +{ + using namespace FoamGpu; + + const gScalar xStart = 0.0; + const gScalar xEnd = 1E-6; + const gScalar dxTry = 1E-7; + + constexpr double errorTol = 1E-8; + + + SECTION("GRI"){ + + TestData::Mechanism mech = TestData::Mechanism::GRI; + + SECTION("Rosenbrock12") + { + std::string name = "Rosenbrock12"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock23") + { + std::string name = "Rosenbrock23"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock34") + { + std::string name = "Rosenbrock34"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + + } + + SECTION("H2"){ + + TestData::Mechanism mech = TestData::Mechanism::H2; + + SECTION("Rosenbrock12") + { + std::string name = "Rosenbrock12"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock23") + { + std::string name = "Rosenbrock23"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock34") + { + std::string name = "Rosenbrock34"; + auto test_result = CpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + + } + +} + +TEST_CASE("Test gpuOdeSolver (on GPU)", "[GPU]") +{ + using namespace FoamGpu; + + const gScalar xStart = 0.0; + const gScalar xEnd = 1E-6; + const gScalar dxTry = 1E-7; + + constexpr double errorTol = 1E-8; + + + SECTION("GRI"){ + + TestData::Mechanism mech = TestData::Mechanism::GRI; + + SECTION("Rosenbrock12") + { + std::string name = "Rosenbrock12"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock23") + { + std::string name = "Rosenbrock23"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock34") + { + std::string name = "Rosenbrock34"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + + } + + SECTION("H2"){ + + TestData::Mechanism mech = TestData::Mechanism::H2; + + SECTION("Rosenbrock12") + { + std::string name = "Rosenbrock12"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock23") + { + std::string name = "Rosenbrock23"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + SECTION("Rosenbrock34") + { + std::string name = "Rosenbrock34"; + auto test_result = GpuTestKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + auto reference = OFReferenceKernels::ode_solve(mech, name, xStart, xEnd, dxTry); + remove_negative(test_result, errorTol); + remove_negative(reference, errorTol); + CHECK_THAT + ( + test_result, + Catch::Matchers::Approx(reference).epsilon(errorTol) + ); + + } + + } + +} + + + +TEST_CASE("for_each_index (on GPU)", "[GPU]"){ + + CHECK(GpuTestKernels::test_for_each_index() == true); + +} +/* +TEST_CASE("Test gpuKernelEvaluator"){ + + CHECK(TestData::test_evaluator(1) == true); + CHECK(TestData::test_evaluator(2) == true); + +} +*/ \ No newline at end of file diff --git a/gpu_chemistry/unittest/tests/Test-ludecompose.C b/gpu_chemistry/unittest/tests/Test-ludecompose.C deleted file mode 100644 index f9e88b5..0000000 --- a/gpu_chemistry/unittest/tests/Test-ludecompose.C +++ /dev/null @@ -1,394 +0,0 @@ -#define CATCH_CONFIG_ENABLE_BENCHMARKING - -#include "catch.H" -#include "mdspan.H" -#include "test_utilities.H" -#include "ludecompose.H" -#include "scalarMatrices.H" - -#include -#include - -//#include "gsl_functions.hpp" - -inline auto call_lu_gpu(const std::vector& m_vals, const std::vector& s_vals) -{ - - gLabel size = std::sqrt(m_vals.size()); - - device_vector matrix(m_vals.begin(), m_vals.end()); - device_vector pivot(size, 0); - device_vector v(size, 0); - device_vector source(s_vals.begin(), s_vals.end()); - - auto m_span = make_mdspan(matrix, extents<2>{size, size}); - auto p_span = make_mdspan(pivot, extents<1>{size}); - auto v_span = make_mdspan(v, extents<1>{size}); - - eval - ( - [=](){FoamGpu::LUDecompose(m_span, p_span, v_span); return 0;} - ); - - auto s_span = make_mdspan(source, extents<1>{size}); - - eval - ( - [=](){FoamGpu::LUBacksubstitute(m_span, p_span, s_span); return 0;} - ); - - - auto m_ret = toStdVector(matrix); - auto p_ret = toStdVector(pivot); - auto s_ret = toStdVector(source); - - return std::make_tuple(m_ret, p_ret, s_ret); - -} - - - - -inline auto call_lu_cpu(const std::vector& m_vals, const std::vector s_vals) -{ - - gLabel size = std::sqrt(m_vals.size()); - - Foam::scalarSquareMatrix matrix(size, 0); - std::copy(m_vals.begin(), m_vals.end(), matrix.v()); - Foam::List pivot(size, 0); - Foam::scalarField source(size); - std::copy(s_vals.begin(), s_vals.end(), source.begin()); - - gLabel sign; - Foam::LUDecompose(matrix, pivot, sign); - Foam::LUBacksubstitute(matrix, pivot, source); - - - auto m_ret = std::vector(matrix.v(), matrix.v() + size*size); - auto p_ret = toStdVector(pivot); - auto s_ret = toStdVector(source); - - return std::make_tuple(m_ret, p_ret, s_ret); - -} - - - -TEST_CASE("Test ludecompose") -{ - using namespace FoamGpu; - - for (int i = 3; i < 50; ++i) - { - int size = i; - - - std::vector vals(size*size); - fill_random(vals); - std::vector source(size, 1); - - - auto [m_gpu, p_gpu, s_gpu] = call_lu_gpu(vals, source); - auto [m_cpu, p_cpu, s_cpu] = call_lu_cpu(vals, source); - - REQUIRE_THAT - ( - m_gpu, - Catch::Matchers::Approx(m_cpu).epsilon(errorTol) - ); - REQUIRE_THAT - ( - p_gpu, - Catch::Matchers::Approx(p_cpu).epsilon(errorTol) - ); - REQUIRE_THAT - ( - s_gpu, - Catch::Matchers::Approx(s_cpu).epsilon(errorTol) - ); - - } -} - - - - - - - - - -/* - -TEST_CASE("Test gsl tutorial constant pivot") -{ - const std::vector m_vals = { 0.18, 0.60, 0.57, 0.96, - 0.41, 0.24, 0.99, 0.58, - 0.14, 0.30, 0.97, 0.66, - 0.51, 0.13, 0.19, 0.85 }; - - gLabel size = std::sqrt(m_vals.size()); - - - Foam::scalarSquareMatrix foam_matrix(size, 0); - std::copy(m_vals.begin(), m_vals.end(), foam_matrix.v()); - Foam::List foam_pivot(size, 0); - gLabel sign; - Foam::LUDecompose(foam_matrix, foam_pivot, sign); - - std::vector gsl_matrix(m_vals.begin(), m_vals.end()); - gsl_permutation *gsl_pivot = gsl_permutation_alloc(size); - GSL::LUDecompose(gsl_matrix, gsl_pivot); - - std::vector gsl_solution(size); - - SECTION("Test 1"){ - const std::vector s_vals(size, 1); - - Foam::scalarField foam_source(size); - std::copy(s_vals.begin(), s_vals.end(), foam_source.begin()); - - std::vector gsl_source(s_vals.begin(), s_vals.end()); - - - - Foam::LUBacksubstitute(foam_matrix, foam_pivot, foam_source); - GSL::LUBacksubstitute(gsl_matrix, gsl_pivot, gsl_source, gsl_solution); - - - REQUIRE_THAT - ( - gsl_solution, - Catch::Matchers::Approx(toStdVector(foam_source)).epsilon(errorTol) - ); - - - } - - SECTION("Test 2"){ - const std::vector s_vals = [&](){ - - std::vector ret(size); - fill_random(ret); - return ret; - - }(); - - Foam::scalarField foam_source(size); - std::copy(s_vals.begin(), s_vals.end(), foam_source.begin()); - - std::vector gsl_source(s_vals.begin(), s_vals.end()); - - - - Foam::LUBacksubstitute(foam_matrix, foam_pivot, foam_source); - GSL::LUBacksubstitute(gsl_matrix, gsl_pivot, gsl_source, gsl_solution); - - - REQUIRE_THAT - ( - gsl_solution, - Catch::Matchers::Approx(toStdVector(foam_source)).epsilon(errorTol) - ); - - } - - -} - -TEST_CASE("Benchmark gsl"){ - - SECTION("N = 10"){ - int size = 10; - - const std::vector m_vals = [=](){ - std::vector ret(size*size); - fill_random(ret); - return ret; - }(); - - const std::vector s_vals(size, 1); - - BENCHMARK_ADVANCED("gsl")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - gsl_permutation *pivot= gsl_permutation_alloc(size); - std::vector result(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - GSL::LUDecompose(matrix, pivot); - GSL::LUBacksubstitute(matrix, pivot, source, result); - return result[0] + result[4]; - }); - gsl_permutation_free(pivot); - }; - - - BENCHMARK_ADVANCED("foamGpu")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - std::vector pivot(size); - std::vector v(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - auto m_span = make_mdspan(matrix, extents<2>{size, size}); - auto p_span = make_mdspan(pivot, extents<1>{size}); - auto v_span = make_mdspan(v, extents<1>{size}); - auto s_span = make_mdspan(source, extents<1>{size}); - FoamGpu::LUDecompose(m_span, p_span, v_span); - FoamGpu::LUBacksubstitute(m_span, p_span, s_span); - return source[0] + source[4]; - }); - }; - - - - BENCHMARK_ADVANCED("of")(Catch::Benchmark::Chronometer meter) { - Foam::scalarSquareMatrix matrix(size, 0); - std::copy(m_vals.begin(), m_vals.end(), matrix.v()); - - Foam::List pivot(size, 0); - Foam::scalarField source(size); - std::copy(s_vals.begin(), s_vals.end(), source.begin()); - - meter.measure([&] { - gLabel sign; - Foam::LUDecompose(matrix, pivot, sign); - Foam::LUBacksubstitute(matrix, pivot, source); - return source[0] + source[4]; - }); - - }; - - - } - - SECTION("N = 50"){ - int size = 50; - - const std::vector m_vals = [=](){ - std::vector ret(size*size); - fill_random(ret); - return ret; - }(); - - const std::vector s_vals(size, 1); - - BENCHMARK_ADVANCED("gsl")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - gsl_permutation *pivot= gsl_permutation_alloc(size); - std::vector result(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - GSL::LUDecompose(matrix, pivot); - GSL::LUBacksubstitute(matrix, pivot, source, result); - return result[0] + result[4]; - }); - gsl_permutation_free(pivot); - }; - - - BENCHMARK_ADVANCED("foamGpu")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - std::vector pivot(size); - std::vector v(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - auto m_span = make_mdspan(matrix, extents<2>{size, size}); - auto p_span = make_mdspan(pivot, extents<1>{size}); - auto v_span = make_mdspan(v, extents<1>{size}); - auto s_span = make_mdspan(source, extents<1>{size}); - FoamGpu::LUDecompose(m_span, p_span, v_span); - FoamGpu::LUBacksubstitute(m_span, p_span, s_span); - return source[0] + source[4]; - }); - }; - - - - BENCHMARK_ADVANCED("of")(Catch::Benchmark::Chronometer meter) { - Foam::scalarSquareMatrix matrix(size, 0); - std::copy(m_vals.begin(), m_vals.end(), matrix.v()); - - Foam::List pivot(size, 0); - Foam::scalarField source(size); - std::copy(s_vals.begin(), s_vals.end(), source.begin()); - - meter.measure([&] { - gLabel sign; - Foam::LUDecompose(matrix, pivot, sign); - Foam::LUBacksubstitute(matrix, pivot, source); - return source[0] + source[4]; - }); - - }; - - - } - - SECTION("N = 100"){ - int size = 100; - - const std::vector m_vals = [=](){ - std::vector ret(size*size); - fill_random(ret); - return ret; - }(); - - const std::vector s_vals(size, 1); - - BENCHMARK_ADVANCED("gsl")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - gsl_permutation *pivot= gsl_permutation_alloc(size); - std::vector result(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - GSL::LUDecompose(matrix, pivot); - GSL::LUBacksubstitute(matrix, pivot, source, result); - return result[0] + result[4]; - }); - gsl_permutation_free(pivot); - }; - - - BENCHMARK_ADVANCED("foamGpu")(Catch::Benchmark::Chronometer meter) { - std::vector matrix(m_vals.begin(), m_vals.end()); - std::vector pivot(size); - std::vector v(size); - std::vector source(s_vals.begin(), s_vals.end()); - meter.measure([&] { - auto m_span = make_mdspan(matrix, extents<2>{size, size}); - auto p_span = make_mdspan(pivot, extents<1>{size}); - auto v_span = make_mdspan(v, extents<1>{size}); - auto s_span = make_mdspan(source, extents<1>{size}); - FoamGpu::LUDecompose(m_span, p_span, v_span); - FoamGpu::LUBacksubstitute(m_span, p_span, s_span); - return source[0] + source[4]; - }); - }; - - - - BENCHMARK_ADVANCED("of")(Catch::Benchmark::Chronometer meter) { - Foam::scalarSquareMatrix matrix(size, 0); - std::copy(m_vals.begin(), m_vals.end(), matrix.v()); - - Foam::List pivot(size, 0); - Foam::scalarField source(size); - std::copy(s_vals.begin(), s_vals.end(), source.begin()); - - meter.measure([&] { - gLabel sign; - Foam::LUDecompose(matrix, pivot, sign); - Foam::LUBacksubstitute(matrix, pivot, source); - return source[0] + source[4]; - }); - - }; - - - } - -} -*/ - - diff --git a/gpu_chemistry/unittest/tests/Test-memoryResource.C b/gpu_chemistry/unittest/tests/Test-memoryResource.C deleted file mode 100644 index 6517628..0000000 --- a/gpu_chemistry/unittest/tests/Test-memoryResource.C +++ /dev/null @@ -1,77 +0,0 @@ -#include "catch.H" -#include "gpuMemoryResource.H" -#include "test_utilities.H" - - -TEST_CASE("memoryResource"){ - using namespace FoamGpu; - - using MR_t = memoryResource_t; - - SECTION("Constructors") - { - REQUIRE_NOTHROW(MR_t()); - - REQUIRE_NOTHROW(MR_t(10, 1)); - } - - SECTION("resize"){ - - MR_t m(10, 3); - REQUIRE_NOTHROW(m.resize(12, 1)); - REQUIRE_NOTHROW(m.resize(0, 0)); - REQUIRE_NOTHROW(m.resize(3, 5)); - CHECK(m.nCells() == 3); - CHECK(m.nEqns() == 7); - CHECK(m.nSpecie() == 5); - } - - SECTION("splitToBuffers") - { - - gLabel nCells = 3; - gLabel nSpecie = 6; - gLabel nEqns = nSpecie +2; - MR_t mr(nCells, nSpecie); - - auto buffers_arr = toDeviceVector(splitToBuffers(mr)); - - CHECK(gLabel(buffers_arr.size()) == nCells); - - - - auto f = [ - nCells = nCells, - nEqns = nEqns, - buffers = make_mdspan(buffers_arr, extents<1>{nCells}) - ]() - { - - for (gLabel i = 0; i < nCells; ++i) - { - for (gLabel j = 0; j < nEqns; ++j) - { - buffers[i].pivotIndices()[j] = i; - buffers[i].dydx0()[j] = gScalar(i); - buffers[i].yTemp()[j] = gScalar(i); - buffers[i].dydx()[j] = gScalar(i); - buffers[i].dfdx()[j] = gScalar(i); - buffers[i].k1()[j] = gScalar(i); - buffers[i].k2()[j] = gScalar(i); - buffers[i].k3()[j] = gScalar(i); - buffers[i].k4()[j] = gScalar(i); - buffers[i].err()[j] = gScalar(i); - buffers[i].lubuffer()[j] = gScalar(i); - buffers[i].c()[j] = gScalar(i); - buffers[i].tempField1()[j] = gScalar(i); - buffers[i].tempField2()[j] = gScalar(i); - } - } - return buffers[0].pivotIndices()[2]; - }; - - CHECK(eval(f) == 0); - - } - -} diff --git a/gpu_chemistry/unittest/tests/Test-ode.C b/gpu_chemistry/unittest/tests/Test-ode.C deleted file mode 100644 index f865f42..0000000 --- a/gpu_chemistry/unittest/tests/Test-ode.C +++ /dev/null @@ -1,195 +0,0 @@ - -#include "catch.H" -#include "test_utilities.H" -#include "create_foam_inputs.H" -#include "create_gpu_inputs.H" -#include "mock_of_odesystem.H" -#include "gpuODESystem.H" -#include "makeGpuOdeSolver.H" -#include "readGpuOdeInputs.H" -#include "Rosenbrock34.H" -#include "Rosenbrock23.H" -#include "mdspan.H" - - - - - - -struct TestParams{ - const gScalar xStart; - const gScalar xEnd; - const gScalar dxTry; -}; - -auto callGpuSolve -( - const Foam::scalarField& y0, - const FoamGpu::gpuODESolver& ode, - TestParams p -) -{ - using namespace FoamGpu; - - const gLabel nEqns = y0.size(); - const gLabel nSpecie = nEqns - 2; - - const gScalar xStart = p.xStart; - const gScalar xEnd = p.xEnd; - const gScalar dxTry = p.dxTry; - - auto y = toDeviceVector(y0); - - memoryResource_t memory(1, nSpecie); - auto buffers = toDeviceVector(splitToBuffers(memory)); - - - auto f = [ - ode = ode, - xStart = xStart, - xEnd = xEnd, - y = make_mdspan(y, extents<1>{nEqns}), - dxTry = dxTry, - buffers = make_mdspan(buffers, extents<1>{1}) - ]() - { - gScalar dxTry_temp = dxTry; - ode.solve(xStart, xEnd, y, dxTry_temp, buffers[0]); - return dxTry_temp; - }; - - auto unused = eval(f); - (void) unused; - - auto ret = toStdVector(y); - - //Round small values to zero to avoid -0 == 0 comparisons - for (auto& e : ret) - { - if (std::abs(e) < gpuSmall) - { - e = 0.0; - } - } - return ret; - -} - -auto callCpuSolve(const Foam::scalarField& y0, const Foam::ODESolver& ode, TestParams p) -{ - using namespace Foam; - - const scalar xStart = p.xStart; - const scalar xEnd = p.xEnd; //1E-5; - const scalar dxTry = p.dxTry; - const label li = 0; - - scalarField y = y0; - scalar dxTry_temp = dxTry; - ode.solve(xStart, xEnd, y, li, dxTry_temp); - - auto ret = toStdVector(y); - - //Round small values to zero to avoid -0 == 0 comparisons - for (auto& e : ret) - { - if (std::abs(e) < gpuSmall) - { - e = 0.0; - } - } - - return ret; - -} - - - - -static inline void runMechanismTests(TestData::Mechanism mech) -{ - - using namespace FoamGpu; - - Foam::MockOFSystem cpu_system(mech); - - auto gpu_thermos = toDeviceVector(makeGpuThermos(mech)); - auto gpu_reactions = toDeviceVector(makeGpuReactions(mech)); - - - gpuODESystem gpu_system - ( - cpu_system.nEqns(), - gLabel(gpu_reactions.size()), - make_raw_pointer(gpu_thermos.data()), - make_raw_pointer(gpu_reactions.data()) - ); - - const Foam::scalarField y0 = [=](){ - gLabel nEqns = TestData::equationCount(mech); - Foam::scalarField y0(nEqns); - assign_test_condition(y0, mech); - return y0; - }(); - - TestParams params{0.0, 1E-5, 1E-7}; - - { - Foam::dictionary dict; - dict.add("solver", "Rosenbrock12"); - - auto cpu = Foam::ODESolver::New(cpu_system, dict); - auto gpu = make_gpuODESolver(gpu_system, read_gpuODESolverInputs(dict)); - auto y_gpu = callGpuSolve(y0, gpu, params); - auto y_cpu = callCpuSolve(y0, cpu, params); - - REQUIRE_THAT - ( - y_gpu, - Catch::Matchers::Approx(toStdVector(y_cpu)).epsilon(errorTol) - ); - } - - { - Foam::dictionary dict; - dict.add("solver", "Rosenbrock23"); - - auto cpu = Foam::ODESolver::New(cpu_system, dict); - auto gpu = make_gpuODESolver(gpu_system, read_gpuODESolverInputs(dict)); - auto y_gpu = callGpuSolve(y0, gpu, params); - auto y_cpu = callCpuSolve(y0, cpu, params); - - REQUIRE_THAT - ( - y_gpu, - Catch::Matchers::Approx(toStdVector(y_cpu)).epsilon(errorTol) - ); - } - - - { - - Foam::dictionary dict; - dict.add("solver", "Rosenbrock34"); - auto cpu = Foam::ODESolver::New(cpu_system, dict); - auto gpu = make_gpuODESolver(gpu_system, read_gpuODESolverInputs(dict)); - auto y_gpu = callGpuSolve(y0, gpu, params); - auto y_cpu = callCpuSolve(y0, cpu, params); - - REQUIRE_THAT - ( - y_gpu, - Catch::Matchers::Approx(toStdVector(y_cpu)).epsilon(errorTol) - ); - } - -} - - -TEST_CASE("Test ODE") -{ - runMechanismTests(TestData::GRI); - runMechanismTests(TestData::H2); -} - - diff --git a/gpu_chemistry/unittest/tests/Test-odeSystem.C b/gpu_chemistry/unittest/tests/Test-odeSystem.C deleted file mode 100644 index 7325180..0000000 --- a/gpu_chemistry/unittest/tests/Test-odeSystem.C +++ /dev/null @@ -1,170 +0,0 @@ - -#include "catch.H" - -#include "mdspan.H" -#include "test_utilities.H" -#include "mock_of_odesystem.H" -#include "gpuODESystem.H" -#include "create_foam_inputs.H" -#include "create_gpu_inputs.H" - - - -static inline void runMechanismTests(TestData::Mechanism mech) -{ - - using namespace FoamGpu; - - - - Foam::MockOFSystem cpu(mech); - - auto gpu_thermos = toDeviceVector(makeGpuThermos(mech)); - auto gpu_reactions = toDeviceVector(makeGpuReactions(mech)); - - gpuODESystem gpu - ( - cpu.nEqns(), - gLabel(gpu_reactions.size()), - make_raw_pointer(gpu_thermos.data()), - make_raw_pointer(gpu_reactions.data()) - ); - - const gLabel nSpecie = TestData::speciesCount(mech); - const gLabel nEqns = TestData::equationCount(mech); - - const gLabel li = 0; - - const Foam::scalarField y0 = [=](){ - gLabel nEqns = TestData::equationCount(mech); - Foam::scalarField y0(nEqns); - assign_test_condition(y0, mech); - return y0; - }(); - - { - - const Foam::scalarField y_cpu = y0; - const auto y_gpu = toDeviceVector(y0); - - Foam::scalarField dy_cpu(nEqns, 0.31); - auto dy_gpu = toDeviceVector(dy_cpu); - - - cpu.derivatives(0.0, y_cpu, li, dy_cpu); - - memoryResource_t memory(1, nSpecie); - auto buffers = toDeviceVector(splitToBuffers(memory)); - - auto f = - [ - =, - buffers = make_mdspan(buffers, extents<1>{1}), - y = make_mdspan(y_gpu, extents<1>{nEqns}), - dy = make_mdspan(dy_gpu, extents<1>{nEqns}) - ] - () - { - gpu.derivatives(y, dy, buffers[0]); - return 0; - }; - - eval(f); - - - REQUIRE_THAT - ( - toStdVector(dy_gpu), - Catch::Matchers::Approx(toStdVector(dy_cpu)).epsilon(errorTol) - ); - - } - - - - //Jacobian tests - { - const gLabel li = 0; - - const gScalar time = 0.1; - - const Foam::scalarField y_cpu = y0; - const auto y_gpu = toDeviceVector(y0); - - Foam::scalarField dy_cpu(nEqns, 0.31); - auto dy_gpu = toDeviceVector(dy_cpu); - - Foam::scalarSquareMatrix J_cpu(nEqns, 0.1); - device_vector J_gpu(J_cpu.size(), 0.2); - - memoryResource_t memory(1, nSpecie); - auto buffers = toDeviceVector(splitToBuffers(memory)); - - cpu.jacobian(time, y_cpu, li, dy_cpu, J_cpu); - - - auto f = - [ - =, - buffers = make_mdspan(buffers, extents<1>{1}), - y = make_mdspan(y_gpu, extents<1>{nEqns}), - dy = make_mdspan(dy_gpu, extents<1>{nEqns}), - J = make_mdspan(J_gpu, extents<2>{nEqns, nEqns}) - ] - () - { - gpu.jacobian(y, dy, J, buffers[0]); - return 0; - }; - - eval(f); - - /* - auto Jtemp = make_mdspan(J_gpu, extents<2>{nEqns, nEqns}); - for (gLabel j = 0; j < nEqns; ++j) - { - for (gLabel i = 0; i < nEqns; ++i) - { - if (std::abs(J_cpu(j, i) - Jtemp(j, i)) > gpuSmall ) - { - std::cout << J_cpu(j, i) << " " << Jtemp(j, i) << " " << j << " " << i << std::endl; - } - } - } - */ - - REQUIRE_THAT - ( - toStdVector(dy_gpu), - Catch::Matchers::Approx(toStdVector(dy_cpu)).epsilon(errorTol) - ); - - auto Jacobian_cpu = std::vector(J_cpu.v(), J_cpu.v()+J_cpu.size()); - auto Jacobian_gpu = toStdVector(J_gpu); - - REQUIRE_THAT - ( - toStdVector(Jacobian_gpu), - Catch::Matchers::Approx(Jacobian_cpu).epsilon(errorTol) - ); - - } -} - -TEST_CASE("Test gpuOdeSystem") -{ - - - SECTION("GRI") - { - runMechanismTests(TestData::GRI); - } - - SECTION("H2") - { - runMechanismTests(TestData::H2); - } - - - -} \ No newline at end of file diff --git a/gpu_chemistry/unittest/tests/Test-reaction.C b/gpu_chemistry/unittest/tests/Test-reaction.C deleted file mode 100644 index e4921b8..0000000 --- a/gpu_chemistry/unittest/tests/Test-reaction.C +++ /dev/null @@ -1,564 +0,0 @@ -#include "catch.H" - -#include "speciesTable.H" -#include "ArrheniusReactionRate.H" - -#include "gpuReaction.H" - -#include "test_utilities.H" -#include "create_gpu_inputs.H" -#include "create_foam_inputs.H" - -TEST_CASE("variant") -{ - using namespace FoamGpu; - - using Arrhenius = gpuArrheniusReactionRate; - using ThirdBodyArrhenius = gpuThirdBodyArrheniusReactionRate; - using ArrheniusLindemannFallOff = gpuFallOffReactionRate; - using ArrheniusTroeFallOff = gpuFallOffReactionRate; - - - using ReactionRate = - variant::variant - < - Arrhenius, - ThirdBodyArrhenius, - ArrheniusLindemannFallOff, - ArrheniusTroeFallOff - >; - - - SECTION("Constructors") - { - REQUIRE_NOTHROW - ( - ReactionRate() - ); - } - -} - -TEST_CASE("Test gpuReactionRate") -{ - using namespace FoamGpu; - - - SECTION("Constructors") - { - SECTION("Default") - { - auto f = [](){ - - gpuReactionRate r; - return gScalar(r.hasDdc()); - }; - - CHECK(eval(f) == gScalar(false)); - } - - SECTION("Assignment") - { - gpuReactionRate rhs(gpuArrheniusReactionRate(0.1, 0.2, 0.3), false); - auto f = [rhs](){ - - gpuReactionRate lhs = rhs; - return gScalar(lhs.hasDdc()); - }; - - CHECK(eval(f) == gScalar(false)); - } - - } - - - SECTION("operator()") - { - auto nSpecie = makeSpeciesTable(TestData::GRI).size(); - const Foam::scalarField c_cpu(nSpecie, 0.123); - const device_vector c_gpu = host_vector(c_cpu.begin(), c_cpu.end()); - const auto c = make_mdspan(c_gpu, extents<1>{nSpecie}); - - const gScalar p = 1E5; - const gScalar T = 431.4321; - const gLabel li = 0; - SECTION("Arrhenius") - { - SECTION("Test 1") - { - gScalar A = 4.6e16; - gScalar beta = -1.41; - gScalar Ta = 14567.38636; - gpuArrheniusReactionRate gpu(A, beta, Ta); - Foam::ArrheniusReactionRate cpu(A, beta, Ta); - REQUIRE(eval([=](){return gpu(p, T, c);}) == Approx(cpu(p, T, c_cpu, li)).epsilon(errorTol)); - - } - SECTION("Test 1") - { - gScalar A = 1E+11; - gScalar beta = 0; - gScalar Ta = 20127.64955; - gpuArrheniusReactionRate gpu(A, beta, Ta); - Foam::ArrheniusReactionRate cpu(A, beta, Ta); - REQUIRE(eval([=](){return gpu(p, T, c);}) == Approx(cpu(p, T, c_cpu, li)).epsilon(errorTol)); - - } - } - - } - - - -} - - -TEST_CASE("gpuSpeciesCoeffs pow") -{ - - using namespace FoamGpu; - - SECTION("Test1") - { - const gScalar base = 43.421; - const gScalar exp = 1.0; - - Foam::specieExponent cpu(exp); - gpuSpecieExponent gpu(exp); - - REQUIRE - ( - eval([=](){return FoamGpu::speciePow(base, exp); }) - == Approx(Foam::pow(base, cpu)).epsilon(errorTol) - ); - } - - - - SECTION("Test2") - { - const gScalar base = 43.421; - const gLabel exp = 1; - - const Foam::specieExponent cpu(exp); - const gpuSpecieExponent gpu(exp); - REQUIRE - ( - eval([=](){return FoamGpu::speciePow(base, exp); }) - == Approx(Foam::pow(base, cpu)).epsilon(errorTol) - ); - } - - SECTION("Test3") - { - const gScalar base = 43.421; - const gLabel exp = 5; - - const Foam::specieExponent cpu(exp); - const gpuSpecieExponent gpu(exp); - - REQUIRE - ( - eval([=](){return FoamGpu::speciePow(base, exp); }) - == Approx(Foam::pow(base, cpu)).epsilon(errorTol) - ); - } - - - SECTION("Test4") - { - - const gScalar base = 1.32; - const gLabel exp = 2; - const gLabel n = 60; - const Foam::specieExponent cpu(exp); - const FoamGpu::gpuSpecieExponent gpu(exp); - - auto f = [=]() - { - - const Foam::specieExponent er = cpu; - gScalar dCrcj = 1.0; - for (gLabel i = 0; i < n; ++i) - { - dCrcj *= er*Foam::pow(base, er - Foam::specieExponent(gLabel(1))); - } - return dCrcj; - }; - - auto f2 = [=]() - { - - const FoamGpu::gpuSpecieExponent er =gpu; - gScalar dCrcj = 1.0; - for (gLabel i = 0; i < n; ++i) - { - dCrcj *= er*FoamGpu::speciePow(base, er - FoamGpu::gpuSpecieExponent(gLabel(1))); - - } - return dCrcj; - }; - - REQUIRE - ( - eval(f) == Approx(f2()).epsilon(errorTol) - ); - - } - - -} - - -static inline void reactionTests(TestData::Mechanism mech) -{ - - using namespace FoamGpu; - - - const Foam::ReactionList cpu_reactions( - TestData::makeSpeciesTable(mech), - TestData::makeCpuThermos(mech), - TestData::makeReactionDict(mech) - ); - - - //auto cpu_reactions = TestData::makeCpuReactions(mech); - auto gpu_reactions_temp = makeGpuReactions(mech); - - const gLabel nSpecie = TestData::speciesCount(mech); - const gLabel nEqns = TestData::equationCount(mech); - const gScalar p = 1E5; - const gScalar T = 431.4321; - const gLabel li = 0; - - const Foam::scalarField c_cpu(nSpecie, 0.123); - auto c_gpu = toDeviceVector(c_cpu); - //const device_vector c_gpu = host_vector(c_cpu.begin(), c_cpu.end()); - - //device_vector gpu_reactions(gpu_reactions_temp.begin(), gpu_reactions_temp.end()); - - auto gpu_reactions = toDeviceVector(gpu_reactions_temp); - CHECK(cpu_reactions.size() == gLabel(gpu_reactions.size())); - - SECTION("Thigh/Tlow") - { - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu = &(gpu_reactions[i]); - - REQUIRE(eval([=](){return gpu->Tlow();}) == Approx(cpu.Tlow()).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Thigh();}) == Approx(cpu.Thigh()).epsilon(errorTol)); - } - - } - - - SECTION("omega") - { - - const auto c = make_mdspan(c_gpu, extents<1>{nSpecie}); - - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu =&(gpu_reactions[i]); - - auto f_gpu = [=](){ - return gpu->omega(p, T, c); - }; - - auto f_cpu = [&](){ - Foam::scalar omegaf = 0.3; - Foam::scalar omegar = 0.4; - return cpu.omega(p, T, c_cpu, li, omegaf, omegar); - }; - - REQUIRE(eval(f_gpu) == Approx(f_cpu()).epsilon(errorTol)); - } - - } - - - - SECTION("Kc/kf/kr/dkfdT/dkrdT") - { - - - const auto c = make_mdspan(c_gpu, extents<1>{nSpecie}); - - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu =&(gpu_reactions[i]); - - - REQUIRE - ( - eval([=](){return gpu->Kc(p, T);}) - == Approx(cpu.Kc(p, T)).epsilon(errorTol) - ); - - REQUIRE - ( - eval([=](){return gpu->kf(p, T, c);}) - == Approx(cpu.kf(p, T, c_cpu, li)).epsilon(errorTol) - ); - REQUIRE - ( - eval([=](){return gpu->kr(p, T, c);}) - == Approx(cpu.kr(p, T, c_cpu, li)).epsilon(errorTol) - ); - - REQUIRE - ( - eval([=](){ - gScalar Kc = fmax(gpu->RSMALL, gpu->Kc(p, T)); - - return gpu->kr(32.0, p, T, Kc, c);}) - == Approx(cpu.kr(32.0, p, T, c_cpu, li)).epsilon(errorTol) - ); - - - - - } - } - - - - - SECTION("dNdtByV") - { - - - //Here it is important to have same initial condition as the function only modifies - //certain values - Foam::scalarField res_cpu(nSpecie, 0.435); - device_vector res_gpu(nSpecie, 0.435); - - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu = &(gpu_reactions[i]); - - - auto c = make_mdspan(c_gpu, extents<1>{nSpecie}); - auto res = make_mdspan(res_gpu, extents<1>{nSpecie}); - - auto f = [=](){ - gpu->dNdtByV(p, T, c, res); - return 0; - }; - eval(f); - - cpu.dNdtByV(p, T, c_cpu, li, res_cpu, false, Foam::List{}, 0); - - auto dNdtByV_cpu = toStdVector(res_gpu); - auto dNdtByV_gpu = toStdVector(res_cpu); - - for (gLabel i = 0; i < gLabel(dNdtByV_cpu.size()); ++i) - { - REQUIRE(dNdtByV_gpu[i] == Approx(dNdtByV_cpu[i]).epsilon(errorTol)); - } - } - - } - - SECTION("ddNdtByVdcTp") - { - - Foam::List c2s; - gLabel csi0 = 0; - gLabel Tsi = nSpecie; - - - - Foam::scalarField dNdtByV_cpu(nSpecie, 1.0); - Foam::scalarSquareMatrix ddNdtByVdcTp_cpu(nEqns, 11.3); - Foam::scalarField cTpWork0_cpu(nSpecie, 11.1); - Foam::scalarField cTpWork1_cpu(nSpecie, 13.5); - - auto dNdtByV_gpu = toDeviceVector(dNdtByV_cpu); - auto ddNdtByVdcTp_gpu = device_vector - ( - ddNdtByVdcTp_cpu.v(), - ddNdtByVdcTp_cpu.v() + ddNdtByVdcTp_cpu.size() - ); - - auto cTpWork0_gpu = toDeviceVector(cTpWork0_cpu); - - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu = &(gpu_reactions[i]); - - - cpu.ddNdtByVdcTp - ( - p, - T, - c_cpu, - li, - dNdtByV_cpu, - ddNdtByVdcTp_cpu, - false, - c2s, - csi0, - Tsi, - cTpWork0_cpu, - cTpWork1_cpu - ); - - auto f = - [ - =, - c = make_mdspan(c_gpu, extents<1>{nSpecie}), - ddNdtByVdcTp = make_mdspan(ddNdtByVdcTp_gpu, extents<2>{nEqns, nEqns}), - cTpWork0 = make_mdspan(cTpWork0_gpu, extents<1>{nSpecie}) - ] - () - { - auto params = computeReactionParameters(*gpu, c, p, T, cTpWork0); - - gpu->ddNdtByVdcTp - ( - p, - T, - c, - ddNdtByVdcTp, - params - - ); - - return 0; - }; - - eval(f); - - std::vector r_cpu(ddNdtByVdcTp_cpu.v(), ddNdtByVdcTp_cpu.v() + ddNdtByVdcTp_cpu.size()); - std::vector r_gpu = toStdVector(ddNdtByVdcTp_gpu); - REQUIRE_THAT(r_gpu, Catch::Matchers::Approx(r_cpu).epsilon(errorTol)); - - - } - - } - - - SECTION("ddNdtByVdcTp with small concentrations") - { - Foam::List c2s; - gLabel csi0 = 0; - gLabel Tsi = nSpecie; - - Foam::scalarField c_cpu2(nSpecie); - fill_random(c_cpu2); - c_cpu2[3] = 1E-4; - c_cpu2[5] = 1E-5; - c_cpu2[6] = 1E-7; - c_cpu2[7] = gpuSmall; - c_cpu2[8] = 0.9*gpuSmall; - c_cpu2[9] = gpuVSmall; - - auto c_gpu2 = toDeviceVector(c_cpu2); - - Foam::scalarField dNdtByV_cpu(nSpecie, 1.0); - Foam::scalarSquareMatrix ddNdtByVdcTp_cpu(nEqns, 11.3); - Foam::scalarField cTpWork0_cpu(nSpecie, 11.1); - Foam::scalarField cTpWork1_cpu(nSpecie, 13.5); - - - auto dNdtByV_gpu = toDeviceVector(dNdtByV_cpu); - auto ddNdtByVdcTp_gpu = device_vector - ( - ddNdtByVdcTp_cpu.v(), - ddNdtByVdcTp_cpu.v() + ddNdtByVdcTp_cpu.size() - ); - - auto cTpWork0_gpu = toDeviceVector(cTpWork0_cpu); - - - - for (gLabel i = 0; i < cpu_reactions.size(); ++i) - { - const auto& cpu = cpu_reactions[i]; - const auto gpu = &(gpu_reactions[i]); - - cpu.ddNdtByVdcTp - ( - p, - T, - c_cpu2, - li, - dNdtByV_cpu, - ddNdtByVdcTp_cpu, - false, - c2s, - csi0, - Tsi, - cTpWork0_cpu, - cTpWork1_cpu - ); - - auto f = - [ - =, - c = make_mdspan(c_gpu2, extents<1>{nSpecie}), - ddNdtByVdcTp = make_mdspan(ddNdtByVdcTp_gpu, extents<2>{nEqns, nEqns}), - cTpWork0 = make_mdspan(cTpWork0_gpu, extents<1>{nSpecie}) - ] - () - { - auto params = computeReactionParameters(*gpu, c, p, T, cTpWork0); - - gpu->ddNdtByVdcTp - ( - p, - T, - c, - ddNdtByVdcTp, - params - ); - - return 0; - }; - - - eval(f); - - - - - std::vector r_cpu(ddNdtByVdcTp_cpu.v(), ddNdtByVdcTp_cpu.v() + ddNdtByVdcTp_cpu.size()); - std::vector r_gpu = toStdVector(ddNdtByVdcTp_gpu); - - REQUIRE_THAT(r_gpu, Catch::Matchers::Approx(r_cpu).epsilon(errorTol)); - - - } - - - } - -} - - - -TEST_CASE("Test gpuReaction functions") -{ - - - SECTION("GRI") - { - reactionTests(TestData::GRI); - } - - SECTION("H2") - { - reactionTests(TestData::H2); - } - - -} diff --git a/gpu_chemistry/unittest/tests/Test-thermo.C b/gpu_chemistry/unittest/tests/Test-thermo.C deleted file mode 100644 index a6b4e45..0000000 --- a/gpu_chemistry/unittest/tests/Test-thermo.C +++ /dev/null @@ -1,273 +0,0 @@ -#include "catch.H" - - -#include "test_utilities.H" -#include "create_gpu_inputs.H" -#include "create_foam_inputs.H" -#include "makeGpuThermo.H" - -template -static inline auto toArray(Foam::FixedList a) -{ - - std::array ret{}; - for (size_t i = 0; i < N; ++i) - { - ret[i] = a[i]; - } - return ret; -} - - - -static inline auto toArray(typename FoamGpu::gpuThermo::coeffArray a) -{ - std::array ret{}; - for (size_t i = 0; i < 7; ++i) - { - ret[i] = a[i]; - } - return ret; -} - -Foam::dictionary get_dictionary() -{ - using namespace Foam; - - OStringStream os; - os << "OH" << endl; - os << "{" << endl; - os << "specie {molWeight 17; massFraction 1;}" << endl; - os << "thermodynamics { Tlow 200; Thigh 3500; Tcommon 1000; " << endl; - os << "highCpCoeffs (1 1 1 1 1 1 1);" << endl; - os << "lowCpCoeffs (1 1 1 1 1 1 1);" << endl; - - os << "}" << endl; - os << "}" << endl; - - - IStringStream is(os.str()); - - dictionary dict(is); - - auto sdict = dict.subDict("OH"); - - - return dict.subDict("OH"); - -} - -template -static T arithmetic_kernel(T& lhs, T& rhs){ - lhs += rhs; - auto r1 = lhs + rhs; - auto r2 = 3*r1; - auto r3 = (r1 == r2); - return r3; -} - -TEST_CASE("Test gpuThermo"){ - - using namespace FoamGpu; - - - SECTION("Constructors") - { - REQUIRE_NOTHROW(gpuThermo()); - } - - - auto dict = get_dictionary(); - - - - const Foam::species::thermo >,Foam::sensibleEnthalpy> - cpu - ( - "Something", - dict - ); - - const gpuThermo gpu = makeGpuThermo(cpu, dict); - - - CHECK(toArray(cpu.highCpCoeffs()) == toArray(gpu.highCpCoeffs())); - CHECK(toArray(cpu.lowCpCoeffs()) == toArray(gpu.lowCpCoeffs())); - - - - SECTION("gpuThermo operator +=") - { - - SECTION("non self assignment"){ - auto cpu1(cpu); - auto cpu2(cpu1); - - auto gpu1(gpu); - auto gpu2(gpu1); - - gpu1 += gpu2; - cpu1 += cpu2; - - CHECK(cpu1.W() == gpu1.W()); - CHECK(cpu1.Y() == gpu1.Y()); - CHECK(toArray(cpu1.highCpCoeffs()) == toArray(gpu1.highCpCoeffs())); - CHECK(toArray(cpu1.lowCpCoeffs()) == toArray(gpu1.lowCpCoeffs())); - - } - - - SECTION("self assignment"){ - - auto cpu1(cpu); - auto gpu1(gpu); - - cpu1 += cpu1; - gpu1 += gpu1; - - CHECK(cpu1.W() == gpu1.W()); - CHECK(cpu1.Y() == gpu1.Y()); - CHECK(toArray(cpu1.highCpCoeffs()) == toArray(gpu1.highCpCoeffs())); - CHECK(toArray(cpu1.lowCpCoeffs()) == toArray(gpu1.lowCpCoeffs())); - - - } - - } - - SECTION("gpuThermo arithmetic_kernel") - { - auto cpu1(cpu); - auto cpu2(cpu1); - - auto gpu1(gpu); - auto gpu2(gpu1); - - auto rgpu = arithmetic_kernel(gpu1, gpu2); - auto rcpu = arithmetic_kernel(cpu1, cpu2); - - - - CHECK(rcpu.W() == rgpu.W()); - CHECK(rcpu.Y() == rgpu.Y()); - CHECK(toArray(rcpu.highCpCoeffs()) == toArray(rgpu.highCpCoeffs())); - CHECK(toArray(rcpu.lowCpCoeffs()) == toArray(rgpu.lowCpCoeffs())); - - } - -} - - - - -TEST_CASE("Test gpuThermo properties") -{ - using namespace FoamGpu; - - - SECTION("Gri") - { - auto cpuThermos = TestData::makeCpuThermos(TestData::GRI); - auto gpuThermos_temp = TestData::makeGpuThermos(TestData::GRI); - device_vector gpuThermos(gpuThermos_temp.begin(), gpuThermos_temp.end()); - CHECK(cpuThermos.size() == gLabel(gpuThermos.size())); - - for (gLabel i = 0; i < cpuThermos.size(); ++i) - { - const auto& cpu = cpuThermos[i]; - const auto gpu = &(gpuThermos[i]); - - gScalar p = 1E5; - gScalar T = 431.4321; - - REQUIRE(eval([=](){return gpu->W();}) == Approx(cpu.W()).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Y();}) == Approx(cpu.Y())); - REQUIRE(eval([=](){return gpu->R();}) == Approx(cpu.R())); - REQUIRE(eval([=](){return gpu->Cp(p, T);}) == Approx(cpu.Cp(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Ha(p, T);}) == Approx(cpu.ha(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Hs(p, T);}) == Approx(cpu.hs(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Hf( );}) == Approx(cpu.hf( )).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->S(p, T);}) == Approx(cpu.s(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Gstd(T);}) == Approx(cpu.gStd(T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->dCpdT(p, T);}) == Approx(cpu.dCpdT(p, T)).epsilon(errorTol)); - - REQUIRE(eval([=](){return gpu->Cv(p, T);}) == Approx(cpu.Cv(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Es(p, T);}) == Approx(cpu.es(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Ea(p, T);}) == Approx(cpu.ea(p, T)).epsilon(errorTol)); - - REQUIRE(eval([=](){return gpu->K(p, T);}) == Approx(cpu.K(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Kp(p, T);}) == Approx(cpu.Kp(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Kc(p, T);}) == Approx(cpu.Kc(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->dKcdTbyKc(p, T);}) == Approx(cpu.dKcdTbyKc(p, T)).epsilon(errorTol)); - - auto gpu_op = [=](){ - auto pair = gpu->KcAnddKcTbyKc(p, T); - return std::get<0>(pair) + std::get<1>(pair); - }; - auto cpu_op = [&](){ - return cpu.Kc(p, T) + cpu.dKcdTbyKc(p, T); - }; - - REQUIRE(eval(gpu_op) == Approx(cpu_op()).epsilon(errorTol)); - - - - } - } - - SECTION("H2") - { - auto cpuThermos = TestData::makeCpuThermos(TestData::H2); - auto gpuThermos_temp = TestData::makeGpuThermos(TestData::H2); - device_vector gpuThermos(gpuThermos_temp.begin(), gpuThermos_temp.end()); - CHECK(cpuThermos.size() == gLabel(gpuThermos.size())); - - for (gLabel i = 0; i < cpuThermos.size(); ++i) - { - const auto& cpu = cpuThermos[i]; - const auto gpu = &(gpuThermos[i]); - - gScalar p = 1E5; - gScalar T = 431.4321; - - REQUIRE(eval([=](){return gpu->W();}) == Approx(cpu.W()).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Y();}) == Approx(cpu.Y())); - REQUIRE(eval([=](){return gpu->R();}) == Approx(cpu.R())); - REQUIRE(eval([=](){return gpu->Cp(p, T);}) == Approx(cpu.Cp(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Ha(p, T);}) == Approx(cpu.ha(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Hs(p, T);}) == Approx(cpu.hs(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Hf( );}) == Approx(cpu.hf( )).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->S(p, T);}) == Approx(cpu.s(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Gstd(T);}) == Approx(cpu.gStd(T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->dCpdT(p, T);}) == Approx(cpu.dCpdT(p, T)).epsilon(errorTol)); - - REQUIRE(eval([=](){return gpu->Cv(p, T);}) == Approx(cpu.Cv(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Es(p, T);}) == Approx(cpu.es(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Ea(p, T);}) == Approx(cpu.ea(p, T)).epsilon(errorTol)); - - REQUIRE(eval([=](){return gpu->K(p, T);}) == Approx(cpu.K(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Kp(p, T);}) == Approx(cpu.Kp(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->Kc(p, T);}) == Approx(cpu.Kc(p, T)).epsilon(errorTol)); - REQUIRE(eval([=](){return gpu->dKcdTbyKc(p, T);}) == Approx(cpu.dKcdTbyKc(p, T)).epsilon(errorTol)); - - auto gpu_op = [=](){ - auto pair = gpu->KcAnddKcTbyKc(p, T); - return std::get<0>(pair) + std::get<1>(pair); - }; - auto cpu_op = [&](){ - return cpu.Kc(p, T) + cpu.dKcdTbyKc(p, T); - }; - - REQUIRE(eval(gpu_op) == Approx(cpu_op()).epsilon(errorTol)); - - } - } - - - -} - - - - - diff --git a/gpu_chemistry/unittest/tests/Test-utilities.C b/gpu_chemistry/unittest/tests/Test-utilities.C deleted file mode 100644 index 45f2af4..0000000 --- a/gpu_chemistry/unittest/tests/Test-utilities.C +++ /dev/null @@ -1,55 +0,0 @@ -#include "catch.H" -#include "mdspan.H" -#include "timer.H" -#include "test_utilities.H" - -TEST_CASE("make_mdspan"){ - - - std::vector v = {1,2,3,4,5,6}; - - auto s = make_mdspan(v, extents<2>(2,3)); - - CHECK(s(0,0) == 1); - - //REQUIRE_THROWS(make_mdspan(v, extents<2>(2,1))); - - CHECK(s.size() == 2*3); - -} - - -TEST_CASE("Test timer"){ - - SECTION("Constructors"){ - - REQUIRE_NOTHROW(Timer()); - - - - - - - auto op = []() { - Timer t; - t.start("ASD"); - - t.stop("ASD"); - t.print(); - return 0.0; - - }; - - - eval(op); - - - - - - } - - - -} - diff --git a/gpu_utils/common/cuda_host_dev.H b/gpu_utils/common/cuda_host_dev.H index 37a1c9c..bd72a55 100644 --- a/gpu_utils/common/cuda_host_dev.H +++ b/gpu_utils/common/cuda_host_dev.H @@ -1,18 +1,27 @@ #pragma once +#if defined(__HIP__) + #define __AMD_BACKEND__ +#endif #if defined(__NVCOMPILER) || defined(__NVCC__) - #define __NVIDIA_COMPILER__ + #define __NVIDIA_BACKEND__ +#endif + +#if defined(__AMD_BACKEND__) || defined(__NVIDIA_BACKEND__) + #define __USING_GPU__ #endif -#ifdef __NVIDIA_COMPILER__ + +#ifdef __USING_GPU__ #define DEVICE __device__ +#define HOST __host__ #else #define DEVICE +#define HOST #endif - -#ifdef __NVIDIA_COMPILER__ +#ifdef __USING_GPU__ #define CUDA_HOSTDEV __host__ __device__ #else #define CUDA_HOSTDEV diff --git a/gpu_utils/common/device_allocate.H b/gpu_utils/common/device_allocate.H index 532ff98..5c8d5a3 100644 --- a/gpu_utils/common/device_allocate.H +++ b/gpu_utils/common/device_allocate.H @@ -2,11 +2,25 @@ #include "error_handling.H" -template -static inline T* device_allocate(size_t length){ - - T* ptr; - const auto bytesize = length * sizeof(T); - gpuErrorCheck(cudaMalloc((void**)&ptr, bytesize)); - return ptr; -} \ No newline at end of file +#ifdef __USING_GPU__ + #ifdef __NVIDIA_BACKEND__ + template + static inline T* device_allocate(size_t length){ + + T* ptr; + const auto bytesize = length * sizeof(T); + gpuErrorCheck(cudaMalloc((void**)&ptr, bytesize)); + return ptr; + } + #else //AMD + template + static inline T* device_allocate(size_t length){ + + T* ptr; + const auto bytesize = length * sizeof(T); + gpuErrorCheck(hipMalloc((void**)&ptr, bytesize)); + gpuErrorCheck(hipMemset(ptr, 0, bytesize)); + return ptr; + } + #endif +#endif \ No newline at end of file diff --git a/gpu_utils/common/device_array.H b/gpu_utils/common/device_array.H new file mode 100644 index 0000000..e467f76 --- /dev/null +++ b/gpu_utils/common/device_array.H @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include "cuda_host_dev.H" + +template +struct device_array{ + + + T data_[N]; + + + using size_type = size_t; + using value_type = T; + using pointer = value_type*; + using iterator = pointer; + using reference = value_type&; + using const_reference = const value_type&; + + inline constexpr CUDA_HOSTDEV size_t size() const {return N;} + + + inline constexpr CUDA_HOSTDEV pointer data() {return data_;} + inline constexpr CUDA_HOSTDEV auto data() const {return data_;} + + + inline constexpr CUDA_HOSTDEV iterator begin() {return data_;} + inline constexpr CUDA_HOSTDEV iterator begin() const {return data_;} + + inline constexpr CUDA_HOSTDEV iterator end() {return begin() + N;} + inline constexpr CUDA_HOSTDEV iterator end() const {return begin() + N;} + + + inline constexpr CUDA_HOSTDEV reference operator[](size_t i) {return data_[i];} + inline constexpr CUDA_HOSTDEV const_reference operator[](size_t i) const {return data_[i];} + + +}; + diff --git a/gpu_utils/common/device_free.H b/gpu_utils/common/device_free.H index 2df74b8..b841eff 100644 --- a/gpu_utils/common/device_free.H +++ b/gpu_utils/common/device_free.H @@ -2,7 +2,18 @@ #include "error_handling.H" -template -static inline void device_free(T* ptr){ - gpuErrorCheck(cudaFree(ptr)); -} \ No newline at end of file +#ifdef __USING_GPU__ + + #ifdef __NVIDIA_BACKEND__ + template + static inline void device_free(T* ptr){ + gpuErrorCheck(cudaFree(ptr)); + } + #else //AMD + template + static inline void device_free(T* ptr){ + gpuErrorCheck(hipFree(ptr)); + } +#endif + +#endif \ No newline at end of file diff --git a/gpu_utils/common/device_vector.H b/gpu_utils/common/device_vector.H new file mode 100644 index 0000000..510b538 --- /dev/null +++ b/gpu_utils/common/device_vector.H @@ -0,0 +1,102 @@ +#pragma once + +#include +#include + + + +#include "device_allocate.H" +#include "device_free.H" +#include "error_handling.H" +#include "host_device_transfers.H" +#include "cuda_host_dev.H" + +/* +#include +#include +template +using device_vector = thrust::device_vector; + + +template +static inline std::vector toStdVector(const device_vector& v){ + + thrust::host_vector tmp(v.begin(), v.end()); + return std::vector(tmp.begin(), tmp.end()); +} +*/ + + +template +struct device_vector{ + + using size_type = size_t; + using value_type = T; + using pointer = value_type*; + using iterator = pointer; + using reference = value_type&; + using const_reference = const value_type&; + + + device_vector() : size_(0), data_(nullptr) + {} + + explicit device_vector(size_type size) : size_(size), data_(device_allocate(size)) + {} + + + explicit device_vector(const std::vector& host ) + : device_vector(host.size()) + { + host_to_device(host.begin(), host.end(), this->begin()); + } + + + + template + device_vector(Iter h_begin, Iter h_end) : + device_vector(static_cast(std::distance(h_begin, h_end))) + { + host_to_device(h_begin, h_end, this->begin()); + } + + + ~device_vector(){ + if (data_ != nullptr) { + device_free(data_); + size_ = 0; + data_ = nullptr; + } + } + + inline size_type size() { return size_; } + inline size_type size() const { return size_; } + + + pointer data() const { return data_; } + pointer data() { return data_; } + + iterator begin() {return data_;} + iterator begin() const {return data_;} + + iterator end() {return data_ + size_;} + iterator end() const {return data_ + size_;} + + +private: + + size_type size_; + pointer data_; + + +}; + + +template +static inline std::vector toStdVector(const device_vector& v){ + + std::vector ret(v.size()); + device_to_host(v.begin(), v.end(), ret.begin()); + return ret; + +} \ No newline at end of file diff --git a/gpu_utils/common/double_intrinsics.H b/gpu_utils/common/double_intrinsics.H index 5c6fc75..6e02c71 100644 --- a/gpu_utils/common/double_intrinsics.H +++ b/gpu_utils/common/double_intrinsics.H @@ -3,7 +3,7 @@ #include "cuda_host_dev.H" -#ifdef __NVIDIA_COMPILER__ +#ifdef __NVIDIA_BACKEND__ #define dADD(x,y) __dadd_rd(x,y) #define dDIV(x,y) __ddiv_rd(x,y) diff --git a/gpu_utils/common/error_handling.H b/gpu_utils/common/error_handling.H index c2bac0e..dda3877 100644 --- a/gpu_utils/common/error_handling.H +++ b/gpu_utils/common/error_handling.H @@ -4,28 +4,32 @@ #include - -#ifdef __NVIDIA_COMPILER__ -#include -#include "cuda_runtime.h" - -#define gpuErrorCheck(call) \ -do{ \ - cudaError_t gpuErr = call; \ - if(cudaSuccess != gpuErr){ \ - printf("GPU Error - %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(gpuErr)); \ - exit(1); \ - } \ -}while(0) - - - - +#ifdef __USING_GPU__ + #ifdef __NVIDIA_BACKEND__ + #include "cuda_runtime.h" + + #define gpuErrorCheck(call) \ + do{ \ + cudaError_t gpuErr = call; \ + if(cudaSuccess != gpuErr){ \ + printf("GPU Error - %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(gpuErr)); \ + exit(1); \ + } \ + }while(0) + + + #else //AMD + #include + #define gpuErrorCheck(call) \ + do{ \ + hipError_t gpuErr = call; \ + if(hipSuccess != gpuErr){ \ + printf("GPU Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(gpuErr)); \ + exit(1); \ + } \ + }while(0) + #endif #else - -#define gpuErrorCheck(val) - - - + #define gpuErrorCheck(val) #endif diff --git a/gpu_utils/common/for_each_index.H b/gpu_utils/common/for_each_index.H index 186b29f..75f6223 100644 --- a/gpu_utils/common/for_each_index.H +++ b/gpu_utils/common/for_each_index.H @@ -3,23 +3,22 @@ #include "gpu_constants.H" #include "error_handling.H" -namespace detail{ +template +__global__ void for_each_index_kernel(gLabel n, UnaryOperation op) +{ + int i = blockIdx.x*blockDim.x + threadIdx.x; + if (i < n) op(i); +} -template -__global__ void cuda_backend(gLabel n, UnaryOperation op) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n) { op(i); } -} -} /// ///@brief Evaluates op(i) for all i in range [0, n[ in parallel. /// -///@param op A unary opeartion taking a gLabel index as a parameter. +///@param op A unary opeartion taking a gLabel index as a parameter. ///@param n The maximum i index (non-inclusive). /// template @@ -27,9 +26,23 @@ static inline void for_each_index(UnaryOperation op, gLabel n){ gLabel NTHREADS = 32; gLabel NBLOCKS = (n + NTHREADS - 1) / NTHREADS; - detail::cuda_backend<<>>(n, op); - gpuErrorCheck(cudaGetLastError()); - gpuErrorCheck(cudaDeviceSynchronize()); + + + #ifdef __USING_GPU__ + #ifdef __NVIDIA_BACKEND__ + for_each_index_kernel<<>>(n, op); + gpuErrorCheck(cudaGetLastError()); + gpuErrorCheck(cudaDeviceSynchronize()); + #else + hipLaunchKernelGGL(for_each_index_kernel, dim3(NBLOCKS), + dim3(NTHREADS), 0, 0, n, op); + + gpuErrorCheck(hipGetLastError()); + gpuErrorCheck(hipDeviceSynchronize()); + #endif + #endif + //Should probably throw here but this file wont anyways compile without hip or cuda compiler + } \ No newline at end of file diff --git a/gpu_utils/common/gpu_constants.H b/gpu_utils/common/gpu_constants.H index 7e9ac4e..00323ba 100644 --- a/gpu_utils/common/gpu_constants.H +++ b/gpu_utils/common/gpu_constants.H @@ -23,13 +23,59 @@ using gScalar = double; // Standard temperature #define gpuTstd double(298.15) -#define gpuLabelMax gLabel(std::numeric_limits::max() / 10) +#ifdef __USING_GPU__ + + + #ifdef __NVIDIA_BACKEND__ + + #include + + //::cuda::std::numeric_limits::max(); + + #define gpuLabelMax gLabel(::cuda::std::numeric_limits::max() / 10) + + #define gpuVGreat double(::cuda::std::numeric_limits::max() / 10.0) + + #define gpuVSmall double(::cuda::std::numeric_limits::min()) + + #define gpuSmall double(::cuda::std::numeric_limits::epsilon()) + + #define gpuScalarNaN double(::cuda::std::numeric_limits::signaling_NaN()) + + #else + //#include + + //::cuda::std::numeric_limits::max(); + + #define gpuLabelMax gLabel(std::numeric_limits::max() / 10) + + #define gpuVGreat double(std::numeric_limits::max() / 10.0) + + #define gpuVSmall double(std::numeric_limits::min()) + + #define gpuSmall double(std::numeric_limits::epsilon()) + + #define gpuScalarNaN double(std::numeric_limits::signaling_NaN()) + + #endif + +#else + + #define gpuLabelMax gLabel(std::numeric_limits::max() / 10) + + #define gpuVGreat double(std::numeric_limits::max() / 10.0) + + #define gpuVSmall double(std::numeric_limits::min()) + + #define gpuSmall double(std::numeric_limits::epsilon()) + + #define gpuScalarNaN double(std::numeric_limits::signaling_NaN()) + + +#endif -#define gpuVGreat double(std::numeric_limits::max() / 10.0) -#define gpuVSmall double(std::numeric_limits::min()) -#define gpuSmall double(std::numeric_limits::epsilon()) #define gpuGreat double(1.0 / gpuSmall) diff --git a/gpu_utils/common/host_device_transfers.H b/gpu_utils/common/host_device_transfers.H index 4a88c50..4b261d1 100644 --- a/gpu_utils/common/host_device_transfers.H +++ b/gpu_utils/common/host_device_transfers.H @@ -1,22 +1,78 @@ #pragma once #include "error_handling.H" -#include "thrust/copy.h" -template -static inline void host_to_device(InputIter h_begin, InputIter h_end, OutputIter d_begin){ - - auto length = std::distance(h_begin, h_end); - using T = typename std::iterator_traits::value_type; - using T2 = typename std::iterator_traits::value_type; - static_assert(std::is_same_v, "Mismatching types in host_to_device"); +#ifdef __USING_GPU__ + #ifdef __NVIDIA_BACKEND__ + template + static inline void host_to_device(InputIter h_begin, InputIter h_end, OutputIter d_begin){ - auto bytesize = length * sizeof(T); - gpuErrorCheck( - cudaMemcpy(d_begin, &(*h_begin), bytesize, cudaMemcpyHostToDevice)); - + auto length = std::distance(h_begin, h_end); + using T = typename std::iterator_traits::value_type; + using T2 = typename std::iterator_traits::value_type; -} + static_assert(std::is_same_v, "Mismatching types in host_to_device"); + auto bytesize = length * sizeof(T); + gpuErrorCheck( + cudaMemcpy(d_begin, &(*h_begin), bytesize, cudaMemcpyHostToDevice)); + + } + #else //AMD + #include "hip/hip_runtime.h" + template + static inline void host_to_device(InputIter h_begin, InputIter h_end, OutputIter d_begin){ + + auto length = std::distance(h_begin, h_end); + using T = typename std::iterator_traits::value_type; + using T2 = typename std::iterator_traits::value_type; + + static_assert(std::is_same_v, "Mismatching types in host_to_device"); + + auto bytesize = length * sizeof(T); + gpuErrorCheck( + hipMemcpy(d_begin, &(*h_begin), bytesize, hipMemcpyHostToDevice)); + + } + #endif +#endif + + + +#ifdef __USING_GPU__ + #ifdef __NVIDIA_BACKEND__ + template + static inline void device_to_host(InputIter d_begin, InputIter d_end, OutputIter h_begin){ + + auto length = std::distance(d_begin, d_end); + using T = typename std::iterator_traits::value_type; + using T2 = typename std::iterator_traits::value_type; + + static_assert(std::is_same_v, "Mismatching types in host_to_device"); + + auto bytesize = length * sizeof(T); + gpuErrorCheck( + cudaMemcpy(&(*h_begin), &(*d_begin), bytesize, cudaMemcpyDeviceToHost)); + + + } + #else //AMD + #include "hip/hip_runtime.h" + template + static inline void device_to_host(InputIter d_begin, InputIter d_end, OutputIter h_begin){ + + auto length = std::distance(d_begin, d_end); + using T = typename std::iterator_traits::value_type; + using T2 = typename std::iterator_traits::value_type; + + static_assert(std::is_same_v, "Mismatching types in host_to_device"); + + auto bytesize = length * sizeof(T); + gpuErrorCheck( + hipMemcpy(&(*h_begin), &(*d_begin), bytesize, hipMemcpyDeviceToHost)); + + } + #endif +#endif diff --git a/gpu_utils/common/host_device_vectors.H b/gpu_utils/common/host_device_vectors.H index cdbbdac..024d5f5 100644 --- a/gpu_utils/common/host_device_vectors.H +++ b/gpu_utils/common/host_device_vectors.H @@ -3,24 +3,20 @@ #include #include "cuda_host_dev.H" -#ifdef __NVIDIA_COMPILER__ -#include "thrust/device_vector.h" -#include "thrust/host_vector.h" template -using host_vector = thrust::host_vector; +using host_vector = std::vector; -template -using device_vector = thrust::device_vector; +#ifdef __USING_GPU__ +#include "device_vector.H" +//template +//using device_vector = device_vector; #else -template -using host_vector = std::vector; - template using device_vector = std::vector; @@ -28,6 +24,22 @@ using device_vector = std::vector; #endif +template +static inline auto toDeviceVector(const Container& c) +{ + using value = typename Container::value_type; + return device_vector(c.begin(), c.end()); +} + +template +static inline std::vector toStdVector(const std::vector& v) +{ + return v; + +} + + +/* template static inline auto toDeviceVector(const Container& c) { @@ -43,3 +55,4 @@ static inline auto toStdVector(const Container& c) host_vector temp(c.begin(), c.end()); return std::vector(temp.begin(), temp.end()); } +*/ \ No newline at end of file diff --git a/gpu_utils/common/mdspan.H b/gpu_utils/common/mdspan.H index 5dc6b65..92b6093 100644 --- a/gpu_utils/common/mdspan.H +++ b/gpu_utils/common/mdspan.H @@ -4,13 +4,9 @@ #include "cuda_host_dev.H" #include "pointer_casts.hpp" -#ifdef __NVIDIA_COMPILER__ -#include "thrust/device_vector.h" //thrust::raw_pointer_cast - -#endif - -namespace stdex = std::experimental; +//namespace stdex = std::experimental; +namespace stdex = std; @@ -62,7 +58,7 @@ static CUDA_HOSTDEV std::size_t flat_size(Extents ext) { /// @param dims dimensions of the multi-dimensional span /// @return a multi-dimensional span template -static constexpr auto make_mdspan(Container& c, Extents ext) { +static constexpr HOST auto make_mdspan(Container& c, Extents ext) { using value_type = typename Container::value_type; //topaz::runtime_assert(flat_size(ext) == std::size(c), // "Dimension mismatch in make_mdspan"); @@ -76,7 +72,7 @@ static constexpr auto make_mdspan(Container& c, Extents ext) { /// @param dims dimensions of the multi-dimensional span /// @return a multi-dimensional span template -static constexpr auto make_mdspan(const Container& c, Extents ext) { +static constexpr HOST auto make_mdspan(const Container& c, Extents ext) { using value_type = const typename Container::value_type; //topaz::runtime_assert(flat_size(ext) == std::size(c), // "Dimension mismatch in make_mdspan"); diff --git a/gpu_utils/common/mdspan/mdspan_impl.hpp b/gpu_utils/common/mdspan/mdspan_impl.hpp index 87111b3..efaad13 100644 --- a/gpu_utils/common/mdspan/mdspan_impl.hpp +++ b/gpu_utils/common/mdspan/mdspan_impl.hpp @@ -1,267 +1,147 @@ -#pragma GCC system_header #ifndef _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_ #define _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_ -# include -# include -# include -#include // std::is_void -#include // size_t -#include -#include // integer_sequence -#include // size_t -#include // numeric_limits -#include -//#include -#include // integer_sequence -#include -#include // size_t -#include // integer_sequence -#include -#include -#include -#include -#include -#include -#include -//#include -//#include -#include // std::apply -#include // std::pair -#include -#include //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/mdarray -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ +#ifndef MDSPAN_IMPL_STANDARD_NAMESPACE + #define MDSPAN_IMPL_STANDARD_NAMESPACE std +#endif + +#ifndef MDSPAN_IMPL_PROPOSED_NAMESPACE + #define MDSPAN_IMPL_PROPOSED_NAMESPACE experimental +#endif + //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/mdspan -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +//@HEADER + + +#ifndef MDSPAN_IMPL_STANDARD_NAMESPACE + #define MDSPAN_IMPL_STANDARD_NAMESPACE std +#endif + +#ifndef MDSPAN_IMPL_PROPOSED_NAMESPACE + #define MDSPAN_IMPL_PROPOSED_NAMESPACE experimental +#endif + +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/mdspan/mdspan.hpp +//@HEADER +// ************************************************************************ // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// ************************************************************************ //@HEADER -*/ +#ifndef MDSPAN_HPP_ +#define MDSPAN_HPP_ + +#ifndef MDSPAN_IMPL_STANDARD_NAMESPACE + #define MDSPAN_IMPL_STANDARD_NAMESPACE Kokkos +#endif + +#ifndef MDSPAN_IMPL_PROPOSED_NAMESPACE + #define MDSPAN_IMPL_PROPOSED_NAMESPACE Experimental +#endif //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/default_accessor.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/macros.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/config.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - #ifndef __has_include # define __has_include(x) 0 #endif #if __has_include() +# include #else +# include +# include #endif #ifdef _MSVC_LANG @@ -273,10 +153,17 @@ #define MDSPAN_CXX_STD_14 201402L #define MDSPAN_CXX_STD_17 201703L #define MDSPAN_CXX_STD_20 202002L +// Note GCC has not updated this in version 13 +#ifdef __clang__ +#define MDSPAN_CXX_STD_23 202302L +#else +#define MDSPAN_CXX_STD_23 202100L +#endif #define MDSPAN_HAS_CXX_14 (_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14) #define MDSPAN_HAS_CXX_17 (_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_17) #define MDSPAN_HAS_CXX_20 (_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_20) +#define MDSPAN_HAS_CXX_23 (_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_23) static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or later."); @@ -320,6 +207,12 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or # endif #endif +#ifndef _MDSPAN_HAS_SYCL +# if defined(SYCL_LANGUAGE_VERSION) +# define _MDSPAN_HAS_SYCL SYCL_LANGUAGE_VERSION +# endif +#endif + #ifndef __has_cpp_attribute # define __has_cpp_attribute(x) 0 #endif @@ -353,11 +246,15 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or # define _MDSPAN_NO_UNIQUE_ADDRESS #endif +// AMDs HIP compiler seems to have issues with concepts +// it pretends concepts exist, but doesn't ship +#ifndef __HIPCC__ #ifndef _MDSPAN_USE_CONCEPTS # if defined(__cpp_concepts) && __cpp_concepts >= 201507L # define _MDSPAN_USE_CONCEPTS 1 # endif #endif +#endif #ifndef _MDSPAN_USE_FOLD_EXPRESSIONS # if (defined(__cpp_fold_expressions) && __cpp_fold_expressions >= 201603L) \ @@ -426,23 +323,13 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #endif #ifndef _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION -// GCC 10's CTAD seems sufficiently broken to prevent its use. -# if (defined(_MDSPAN_COMPILER_CLANG) || !defined(__GNUC__) || __GNUC__ >= 11) \ - && ((defined(__cpp_deduction_guides) && __cpp_deduction_guides >= 201703) \ - || (!defined(__cpp_deduction_guides) && MDSPAN_HAS_CXX_17)) +# if (!defined(__NVCC__) || (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1170)) && \ + ((defined(__cpp_deduction_guides) && __cpp_deduction_guides >= 201703) || \ + (!defined(__cpp_deduction_guides) && MDSPAN_HAS_CXX_17)) # define _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION 1 # endif #endif -#ifndef _MDSPAN_USE_ALIAS_TEMPLATE_ARGUMENT_DEDUCTION -// GCC 10's CTAD seems sufficiently broken to prevent its use. -# if (defined(_MDSPAN_COMPILER_CLANG) || !defined(__GNUC__) || __GNUC__ >= 11) \ - && ((defined(__cpp_deduction_guides) && __cpp_deduction_guides >= 201907) \ - || (!defined(__cpp_deduction_guides) && MDSPAN_HAS_CXX_20)) -# define _MDSPAN_USE_ALIAS_TEMPLATE_ARGUMENT_DEDUCTION 1 -# endif -#endif - #ifndef _MDSPAN_USE_STANDARD_TRAIT_ALIASES # if (defined(__cpp_lib_transformation_trait_aliases) && __cpp_lib_transformation_trait_aliases >= 201304) \ || (!defined(__cpp_lib_transformation_trait_aliases) && MDSPAN_HAS_CXX_14) @@ -462,7 +349,7 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #endif #ifndef MDSPAN_CONDITIONAL_EXPLICIT -# if MDSPAN_HAS_CXX_20 && !defined(_MDSPAN_COMPILER_MSVC) +# if MDSPAN_HAS_CXX_20 # define MDSPAN_CONDITIONAL_EXPLICIT(COND) explicit(COND) # else # define MDSPAN_CONDITIONAL_EXPLICIT(COND) @@ -512,6 +399,12 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #endif //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/config.hpp +#include +#include +#include // std::is_void +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_SYCL) +#include "assert.h" +#endif #ifndef _MDSPAN_HOST_DEVICE # if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) @@ -533,6 +426,16 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or # define MDSPAN_INLINE_FUNCTION inline _MDSPAN_HOST_DEVICE #endif +#ifndef MDSPAN_FUNCTION +# define MDSPAN_FUNCTION _MDSPAN_HOST_DEVICE +#endif + +#ifdef _MDSPAN_HAS_HIP +# define MDSPAN_DEDUCTION_GUIDE _MDSPAN_HOST_DEVICE +#else +# define MDSPAN_DEDUCTION_GUIDE +#endif + // In CUDA defaulted functions do not need host device markup #ifndef MDSPAN_INLINE_FUNCTION_DEFAULTED # define MDSPAN_INLINE_FUNCTION_DEFAULTED @@ -581,6 +484,72 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #define MDSPAN_PP_REMOVE_PARENS_IMPL(...) __VA_ARGS__ #define MDSPAN_PP_REMOVE_PARENS(...) MDSPAN_PP_REMOVE_PARENS_IMPL __VA_ARGS__ +#define MDSPAN_IMPL_STANDARD_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) +#define MDSPAN_IMPL_PROPOSED_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) "::" MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_PROPOSED_NAMESPACE) + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#elif defined(_MDSPAN_HAS_SYCL) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + sycl::ext::oneapi::experimental::printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#else +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + std::fprintf(stderr, "%s:%u: precondition failure: `%s`\n", file, line, cond); + std::abort(); +} +#endif + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#ifndef MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER +#define MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line) \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::default_precondition_violation_handler(cond, file, line) +#endif + +#ifndef MDSPAN_IMPL_CHECK_PRECONDITION + #ifndef NDEBUG + #define MDSPAN_IMPL_CHECK_PRECONDITION 0 + #else + #define MDSPAN_IMPL_CHECK_PRECONDITION 1 + #endif +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +template +MDSPAN_FUNCTION constexpr void precondition(const char* cond, const char* file, unsigned line) +{ + if (not check) { return; } + // in case the macro doesn't use the arguments for custom macros + (void) cond; + (void) file; + (void) line; + MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line); +} + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#define MDSPAN_IMPL_PRECONDITION(...) \ + do { \ + if (not (__VA_ARGS__)) { \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::precondition(#__VA_ARGS__, __FILE__, __LINE__); \ + } \ + } while (0) + // end Preprocessor helpers }}}1 //============================================================================== @@ -604,8 +573,7 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or /**/ #endif - -#if defined(_MDSPAN_COMPILER_MSVC) +#if defined(_MDSPAN_COMPILER_MSVC) && (!defined(_MSVC_TRADITIONAL) || _MSVC_TRADITIONAL) # define MDSPAN_TEMPLATE_REQUIRES(...) \ MDSPAN_PP_CAT( \ MDSPAN_PP_CAT(MDSPAN_TEMPLATE_REQUIRES_, MDSPAN_PP_COUNT(__VA_ARGS__))\ @@ -759,7 +727,7 @@ struct __mdspan_enable_fold_comma { }; # define _MDSPAN_FOLD_COMMA(...) ((__VA_ARGS__), ...) #else -namespace std { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace __fold_compatibility_impl { @@ -1062,15 +1030,15 @@ struct __bools; } // __fold_compatibility_impl -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE -# define _MDSPAN_FOLD_AND(...) std::__fold_compatibility_impl::__fold_right_and_impl((__VA_ARGS__)...) -# define _MDSPAN_FOLD_OR(...) std::__fold_compatibility_impl::__fold_right_or_impl((__VA_ARGS__)...) -# define _MDSPAN_FOLD_ASSIGN_LEFT(INIT, ...) std::__fold_compatibility_impl::__fold_left_assign_impl(INIT, (__VA_ARGS__)...) -# define _MDSPAN_FOLD_ASSIGN_RIGHT(PACK, ...) std::__fold_compatibility_impl::__fold_right_assign_impl((PACK)..., __VA_ARGS__) -# define _MDSPAN_FOLD_TIMES_RIGHT(PACK, ...) std::__fold_compatibility_impl::__fold_right_times_impl((PACK)..., __VA_ARGS__) -# define _MDSPAN_FOLD_PLUS_RIGHT(PACK, ...) std::__fold_compatibility_impl::__fold_right_plus_impl((PACK)..., __VA_ARGS__) -# define _MDSPAN_FOLD_COMMA(...) std::__fold_compatibility_impl::__fold_comma_impl((__VA_ARGS__)...) +# define _MDSPAN_FOLD_AND(...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_right_and_impl((__VA_ARGS__)...) +# define _MDSPAN_FOLD_OR(...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_right_or_impl((__VA_ARGS__)...) +# define _MDSPAN_FOLD_ASSIGN_LEFT(INIT, ...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_left_assign_impl(INIT, (__VA_ARGS__)...) +# define _MDSPAN_FOLD_ASSIGN_RIGHT(PACK, ...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_right_assign_impl((PACK)..., __VA_ARGS__) +# define _MDSPAN_FOLD_TIMES_RIGHT(PACK, ...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_right_times_impl((PACK)..., __VA_ARGS__) +# define _MDSPAN_FOLD_PLUS_RIGHT(PACK, ...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_right_plus_impl((PACK)..., __VA_ARGS__) +# define _MDSPAN_FOLD_COMMA(...) MDSPAN_IMPL_STANDARD_NAMESPACE::__fold_compatibility_impl::__fold_comma_impl((__VA_ARGS__)...) # define _MDSPAN_FOLD_AND_TEMPLATE(...) \ _MDSPAN_TRAIT(std::is_same, __fold_compatibility_impl::__bools<(__VA_ARGS__)..., true>, __fold_compatibility_impl::__bools) @@ -1112,9 +1080,9 @@ struct __bools; //============================================================================== //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/macros.hpp +#include // size_t -namespace std { -namespace experimental { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { template struct default_accessor { @@ -1129,7 +1097,7 @@ struct default_accessor { MDSPAN_TEMPLATE_REQUIRES( class OtherElementType, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, OtherElementType(*)[], element_type(*)[]) + _MDSPAN_TRAIT(std::is_convertible, OtherElementType(*)[], element_type(*)[]) ) ) MDSPAN_INLINE_FUNCTION @@ -1148,204 +1116,91 @@ struct default_accessor { }; -} // end namespace experimental -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/default_accessor.hpp //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/full_extent_t.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// ************************************************************************ //@HEADER -*/ - -namespace std { -namespace experimental { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { struct full_extent_t { explicit full_extent_t() = default; }; _MDSPAN_INLINE_VARIABLE constexpr auto full_extent = full_extent_t{ }; -} // end namespace experimental -} // namespace std +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/full_extent_t.hpp //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/mdspan.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_right.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/trait_backports.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - #ifndef MDSPAN_INCLUDE_EXPERIMENTAL_BITS_TRAIT_BACKPORTS_HPP_ #define MDSPAN_INCLUDE_EXPERIMENTAL_BITS_TRAIT_BACKPORTS_HPP_ +#include +#include // integer_sequence //============================================================================== // {{{1 @@ -1353,7 +1208,7 @@ _MDSPAN_INLINE_VARIABLE constexpr auto full_extent = full_extent_t{ }; #ifdef _MDSPAN_NEEDS_TRAIT_VARIABLE_TEMPLATE_BACKPORTS #if _MDSPAN_USE_VARIABLE_TEMPLATES -namespace std { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { #define _MDSPAN_BACKPORT_TRAIT(TRAIT) \ template _MDSPAN_INLINE_VARIABLE constexpr auto TRAIT##_v = TRAIT::value; @@ -1369,7 +1224,7 @@ _MDSPAN_BACKPORT_TRAIT(is_void) #undef _MDSPAN_BACKPORT_TRAIT -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE #endif // _MDSPAN_USE_VARIABLE_TEMPLATES @@ -1383,16 +1238,16 @@ _MDSPAN_BACKPORT_TRAIT(is_void) #if !defined(_MDSPAN_USE_INTEGER_SEQUENCE) || !_MDSPAN_USE_INTEGER_SEQUENCE -namespace std { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { template struct integer_sequence { - static constexpr std::size_t size() noexcept { return sizeof...(Vals); } + static constexpr size_t size() noexcept { return sizeof...(Vals); } using value_type = T; }; -template -using index_sequence = std::integer_sequence; +template +using index_sequence = std::integer_sequence; namespace __detail { @@ -1416,13 +1271,13 @@ struct __make_int_seq_impl< template using make_integer_sequence = typename __detail::__make_int_seq_impl>::type; -template +template using make_index_sequence = typename __detail::__make_int_seq_impl>::type; template using index_sequence_for = make_index_sequence; -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE #endif @@ -1434,7 +1289,7 @@ using index_sequence_for = make_index_sequence; #if !defined(_MDSPAN_USE_STANDARD_TRAIT_ALIASES) || !_MDSPAN_USE_STANDARD_TRAIT_ALIASES -namespace std { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { #define _MDSPAN_BACKPORT_TRAIT_ALIAS(TRAIT) \ template using TRAIT##_t = typename TRAIT::type; @@ -1447,7 +1302,7 @@ using enable_if_t = typename enable_if<_B, _T>::type; #undef _MDSPAN_BACKPORT_TRAIT_ALIAS -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE #endif @@ -1457,2428 +1312,1926 @@ using enable_if_t = typename enable_if<_B, _T>::type; #endif //MDSPAN_INCLUDE_EXPERIMENTAL_BITS_TRAIT_BACKPORTS_HPP_ //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/trait_backports.hpp //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/extents.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/static_array.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - - //BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/dynamic_extent.hpp -/* //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ - +#if defined(__cpp_lib_span) +#include +#endif -namespace std { -namespace experimental { +#include // size_t +#include // numeric_limits +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +#if defined(__cpp_lib_span) +using std::dynamic_extent; +#else _MDSPAN_INLINE_VARIABLE constexpr auto dynamic_extent = std::numeric_limits::max(); - -} // end namespace experimental -} // namespace std +#endif +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE //============================================================================================================== //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/dynamic_extent.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/maybe_static_value.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/utility.hpp +#include +#include +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: +// type alias used for rank-based tag dispatch // -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. +// this is used to enable alternatives to constexpr if when building for C++14 // -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ +template +using with_rank = std::integral_constant; +template +constexpr bool common_integral_compare(I1 x, I2 y) +{ + static_assert(std::is_integral::value and + std::is_integral::value, ""); + using I = std::common_type_t; + return static_cast(x) == static_cast(y); +} -namespace std { -namespace experimental { -namespace detail { +template +constexpr bool rankwise_equal(with_rank<0>, const T1&, const T2&, F) +{ + return true; +} +template +constexpr bool rankwise_equal(with_rank, const T1& x, const T2& y, F func) +{ + bool match = true; -//============================================================================== + for (std::size_t r = 0; r < N; r++) { + match = match && common_integral_compare(func(x, r), func(y, r)); + } -template -struct __no_unique_address_emulation { - using __stored_type = _T; - _T __v; - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__ref() const noexcept { - return __v; + return match; +} + +constexpr struct +{ + template + constexpr auto operator()(const T& x, I i) const + { + return x.extent(i); } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__ref() noexcept { - return __v; +} extent; + +constexpr struct +{ + template + constexpr auto operator()(const T& x, I i) const + { + return x.stride(i); } -}; +} stride; -// Empty case -// This doesn't work if _T is final, of course, but we're not using anything -// like that currently. That kind of thing could be added pretty easily though -template -struct __no_unique_address_emulation< - _T, _Disambiguator, - enable_if_t<_MDSPAN_TRAIT(is_empty, _T) && - // If the type isn't trivially destructible, its destructor - // won't be called at the right time, so don't use this - // specialization - _MDSPAN_TRAIT(is_trivially_destructible, _T)>> : -#ifdef _MDSPAN_COMPILER_MSVC - // MSVC doesn't allow you to access public static member functions of a type - // when you *happen* to privately inherit from that type. - protected -#else - // But we still want this to be private if possible so that we don't accidentally - // access members of _T directly rather than calling __ref() first, which wouldn't - // work if _T happens to be stateful and thus we're using the unspecialized definition - // of __no_unique_address_emulation above. - private -#endif - _T { - using __stored_type = _T; - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__ref() const noexcept { - return *static_cast<_T const *>(this); - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__ref() noexcept { - return *static_cast<_T *>(this); - } - - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __no_unique_address_emulation() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __no_unique_address_emulation( - __no_unique_address_emulation const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __no_unique_address_emulation( - __no_unique_address_emulation &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __no_unique_address_emulation & - operator=(__no_unique_address_emulation const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __no_unique_address_emulation & - operator=(__no_unique_address_emulation &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__no_unique_address_emulation() noexcept = default; +} // namespace detail - // Explicitly make this not a reference so that the copy or move - // constructor still gets called. - MDSPAN_INLINE_FUNCTION - explicit constexpr __no_unique_address_emulation(_T const& __v) noexcept : _T(__v) {} - MDSPAN_INLINE_FUNCTION - explicit constexpr __no_unique_address_emulation(_T&& __v) noexcept : _T(::std::move(__v)) {} -}; +constexpr struct mdspan_non_standard_tag { +} mdspan_non_standard; -//============================================================================== +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/utility.hpp -} // end namespace detail -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp +#ifdef __cpp_lib_span +#include #endif +#include +#include -// This is only needed for the non-standard-layout version of partially -// static array. -// Needs to be after the includes above to work with the single header generator -#if !_MDSPAN_PRESERVE_STANDARD_LAYOUT -namespace std { -namespace experimental { - -//============================================================================== +#include +#include +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { -// static case -template -struct __maybe_static_value { - static constexpr _static_t __static_value = __v; - MDSPAN_FORCE_INLINE_FUNCTION constexpr _dynamic_t __value() const noexcept { - return static_cast<_dynamic_t>(__v); - } - template - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - __mdspan_enable_fold_comma - __set_value(_U&& /*__rhs*/) noexcept { - // Should we assert that the value matches the static value here? - return {}; - } +// Function used to check compatibility of extents in converting constructor +// can't be a private member function for some reason. +template +static constexpr std::integral_constant __check_compatible_extents( + std::integral_constant, + std::integer_sequence, + std::integer_sequence) noexcept { + return {}; +} - //-------------------------------------------------------------------------- +// This helper prevents ICE's on MSVC. +template +struct __compare_extent_compatible : std::integral_constant +{}; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __maybe_static_value() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __maybe_static_value(__maybe_static_value const&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __maybe_static_value(__maybe_static_value&&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __maybe_static_value& operator=(__maybe_static_value const&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __maybe_static_value& operator=(__maybe_static_value&&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__maybe_static_value() noexcept = default; +template +static constexpr std::integral_constant< + bool, _MDSPAN_FOLD_AND(__compare_extent_compatible::value)> +__check_compatible_extents( + std::integral_constant, + std::integer_sequence, + std::integer_sequence) noexcept { + return {}; +} - MDSPAN_INLINE_FUNCTION - constexpr explicit __maybe_static_value(_dynamic_t const&) noexcept { - // Should we assert that the value matches the static value here? - } +template +MDSPAN_INLINE_FUNCTION +static constexpr bool are_valid_indices() { + return + _MDSPAN_FOLD_AND(std::is_convertible::value) && + _MDSPAN_FOLD_AND(std::is_nothrow_constructible::value); +} - //-------------------------------------------------------------------------- +// ------------------------------------------------------------------ +// ------------ static_array ---------------------------------------- +// ------------------------------------------------------------------ -}; +// array like class which provides an array of static values with get +// function and operator []. -// dynamic case -template -struct __maybe_static_value<_dynamic_t, _static_t, __is_dynamic_sentinal, __is_dynamic_sentinal, - __array_entry_index> -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __no_unique_address_emulation<_T> -#endif -{ - static constexpr _static_t __static_value = __is_dynamic_sentinal; -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - _MDSPAN_NO_UNIQUE_ADDRESS _dynamic_t __v = {}; - MDSPAN_FORCE_INLINE_FUNCTION constexpr _dynamic_t __value() const noexcept { - return __v; - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _dynamic_t &__ref() noexcept { - return __v; - } - template - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - __mdspan_enable_fold_comma - __set_value(_U&& __rhs) noexcept { - __v = (_U &&)rhs; - return {}; +// Implementation of Static Array with recursive implementation of get. +template struct static_array_impl; + +template +struct static_array_impl { + MDSPAN_INLINE_FUNCTION + constexpr static T get(size_t r) { + if (r == R) + return FirstExt; + else + return static_array_impl::get(r); } + template MDSPAN_INLINE_FUNCTION constexpr static T get() { +#if MDSPAN_HAS_CXX_17 + if constexpr (r == R) + return FirstExt; + else + return static_array_impl::template get(); #else - MDSPAN_FORCE_INLINE_FUNCTION constexpr _dynamic_t __value() const noexcept { - return this->__no_unique_address_emulation<_dynamic_t>::__ref(); + get(r); +#endif } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _dynamic_t &__ref() noexcept { - return this->__no_unique_address_emulation<_dynamic_t>::__ref(); +}; + +// End the recursion +template +struct static_array_impl { + MDSPAN_INLINE_FUNCTION + constexpr static T get(size_t) { return FirstExt; } + template MDSPAN_INLINE_FUNCTION constexpr static T get() { + return FirstExt; } - template - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - __mdspan_enable_fold_comma - __set_value(_U&& __rhs) noexcept { - this->__no_unique_address_emulation<_dynamic_t>::__ref() = (_U &&)__rhs; - return {}; +}; + +// Don't start recursion if size 0 +template struct static_array_impl<0, T> { + MDSPAN_INLINE_FUNCTION + constexpr static T get(size_t) { return T(); } + template MDSPAN_INLINE_FUNCTION constexpr static T get() { + return T(); } -#endif }; -} // namespace detail +// Static array, provides get(), get(r) and operator[r] +template struct static_array: + public static_array_impl<0, T, Values...> { -//============================================================================== +public: + using value_type = T; -} // end namespace experimental -} // end namespace std + MDSPAN_INLINE_FUNCTION + constexpr static size_t size() { return sizeof...(Values); } +}; -#endif // !_MDSPAN_PRESERVE_STANDARD_LAYOUT -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/maybe_static_value.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/standard_layout_static_array.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ +// ------------------------------------------------------------------ +// ------------ index_sequence_scan --------------------------------- +// ------------------------------------------------------------------ -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/compressed_pair.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ +// index_sequence_scan takes compile time values and provides get(r) +// and get() which return the sum of the first r-1 values. +// Recursive implementation for get +template struct index_sequence_scan_impl; +template +struct index_sequence_scan_impl { + MDSPAN_INLINE_FUNCTION + constexpr static size_t get(size_t r) { + if (r > R) + return FirstVal + index_sequence_scan_impl::get(r); + else + return 0; + } +}; -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) +template +struct index_sequence_scan_impl { +#if defined(__NVCC__) || defined(__NVCOMPILER) || \ + defined(_MDSPAN_COMPILER_INTEL) + // NVCC warns about pointless comparison with 0 for R==0 and r being const + // evaluatable and also 0. + MDSPAN_INLINE_FUNCTION + constexpr static size_t get(size_t r) { + return static_cast(R) > static_cast(r) ? FirstVal : 0; + } +#else + MDSPAN_INLINE_FUNCTION + constexpr static size_t get(size_t r) { return R > r ? FirstVal : 0; } #endif +}; +template <> struct index_sequence_scan_impl<0> { + MDSPAN_INLINE_FUNCTION + constexpr static size_t get(size_t) { return 0; } +}; -namespace std { -namespace experimental { -namespace detail { +// ------------------------------------------------------------------ +// ------------ possibly_empty_array ------------------------------- +// ------------------------------------------------------------------ -// For no unique address emulation, this is the case taken when neither are empty. -// For real `[[no_unique_address]]`, this case is always taken. -template struct __compressed_pair { - _MDSPAN_NO_UNIQUE_ADDRESS _T __t_val; - _MDSPAN_NO_UNIQUE_ADDRESS _U __u_val; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__first() noexcept { return __t_val; } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__first() const noexcept { - return __t_val; - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _U &__second() noexcept { return __u_val; } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _U const &__second() const noexcept { - return __u_val; - } +// array like class which provides get function and operator [], and +// has a specialization for the size 0 case. +// This is needed to make the maybe_static_array be truly empty, for +// all static values. - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__compressed_pair() noexcept = default; - template - MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_TLike &&__t, _ULike &&__u) - : __t_val((_TLike &&) __t), __u_val((_ULike &&) __u) {} +template struct possibly_empty_array { + T vals[N]{}; + MDSPAN_INLINE_FUNCTION + constexpr T &operator[](size_t r) { return vals[r]; } + MDSPAN_INLINE_FUNCTION + constexpr const T &operator[](size_t r) const { return vals[r]; } }; -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) +template struct possibly_empty_array { + MDSPAN_INLINE_FUNCTION + constexpr T operator[](size_t) { return T(); } + MDSPAN_INLINE_FUNCTION + constexpr const T operator[](size_t) const { return T(); } +}; -// First empty. -template -struct __compressed_pair< - _T, _U, - enable_if_t<_MDSPAN_TRAIT(is_empty, _T) && !_MDSPAN_TRAIT(is_empty, _U)>> - : private _T { - _U __u_val; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__first() noexcept { - return *static_cast<_T *>(this); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__first() const noexcept { - return *static_cast<_T const *>(this); - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _U &__second() noexcept { return __u_val; } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _U const &__second() const noexcept { - return __u_val; - } +// ------------------------------------------------------------------ +// ------------ maybe_static_array ---------------------------------- +// ------------------------------------------------------------------ - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__compressed_pair() noexcept = default; - template - MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_TLike &&__t, _ULike &&__u) - : _T((_TLike &&) __t), __u_val((_ULike &&) __u) {} -}; +// array like class which has a mix of static and runtime values but +// only stores the runtime values. +// The type of the static and the runtime values can be different. +// The position of a dynamic value is indicated through a tag value. +template +struct maybe_static_array { -// Second empty. -template -struct __compressed_pair< - _T, _U, - enable_if_t> - : private _U { - _T __t_val; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__first() noexcept { return __t_val; } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__first() const noexcept { - return __t_val; - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _U &__second() noexcept { - return *static_cast<_U *>(this); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _U const &__second() const noexcept { - return *static_cast<_U const *>(this); - } + static_assert(std::is_convertible::value, "maybe_static_array: TStatic must be convertible to TDynamic"); + static_assert(std::is_convertible::value, "maybe_static_array: TDynamic must be convertible to TStatic"); - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__compressed_pair() noexcept = default; +private: + // Static values member + using static_vals_t = static_array; + constexpr static size_t m_size = sizeof...(Values); + constexpr static size_t m_size_dynamic = + _MDSPAN_FOLD_PLUS_RIGHT((Values == dyn_tag), 0); + + // Dynamic values member + _MDSPAN_NO_UNIQUE_ADDRESS possibly_empty_array + m_dyn_vals; + + // static mapping of indices to the position in the dynamic values array + using dyn_map_t = index_sequence_scan_impl<0, static_cast(Values == dyn_tag)...>; +public: - template - MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_TLike &&__t, _ULike &&__u) - : _U((_ULike &&) __u), __t_val((_TLike &&) __t) {} -}; + // two types for static and dynamic values + using value_type = TDynamic; + using static_value_type = TStatic; + // tag value indicating dynamic value + constexpr static static_value_type tag_value = dyn_tag; -// Both empty. -template -struct __compressed_pair< - _T, _U, - enable_if_t<_MDSPAN_TRAIT(is_empty, _T) && _MDSPAN_TRAIT(is_empty, _U)>> - // We need to use the __no_unique_address_emulation wrapper here to avoid - // base class ambiguities. -#ifdef _MDSPAN_COMPILER_MSVC -// MSVC doesn't allow you to access public static member functions of a type -// when you *happen* to privately inherit from that type. - : protected __no_unique_address_emulation<_T, 0>, - protected __no_unique_address_emulation<_U, 1> -#else - : private __no_unique_address_emulation<_T, 0>, - private __no_unique_address_emulation<_U, 1> -#endif -{ - using __first_base_t = __no_unique_address_emulation<_T, 0>; - using __second_base_t = __no_unique_address_emulation<_U, 1>; + constexpr maybe_static_array() = default; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__first() noexcept { - return this->__first_base_t::__ref(); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__first() const noexcept { - return this->__first_base_t::__ref(); - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _U &__second() noexcept { - return this->__second_base_t::__ref(); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _U const &__second() const noexcept { - return this->__second_base_t::__ref(); + // constructor for all static values + // TODO: add precondition check? + MDSPAN_TEMPLATE_REQUIRES(class... Vals, + /* requires */ ((m_size_dynamic == 0) && + (sizeof...(Vals) > 0))) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(Vals...) : m_dyn_vals{} {} + + // constructors from dynamic values only + MDSPAN_TEMPLATE_REQUIRES(class... DynVals, + /* requires */ (sizeof...(DynVals) == + m_size_dynamic && + m_size_dynamic > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(DynVals... vals) + : m_dyn_vals{static_cast(vals)...} {} + + + MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, + /* requires */ (N == m_size_dynamic && N > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::array &vals) { + for (size_t r = 0; r < N; r++) + m_dyn_vals[r] = static_cast(vals[r]); } - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __compressed_pair(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & - operator=(__compressed_pair &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__compressed_pair() noexcept = default; - template - MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_TLike &&__t, _ULike &&__u) noexcept - : __first_base_t(_T((_TLike &&) __t)), - __second_base_t(_U((_ULike &&) __u)) - { } -}; + MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, + /* requires */ (N == m_size_dynamic && N == 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::array &) : m_dyn_vals{} {} -#endif // !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) +#ifdef __cpp_lib_span + MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, + /* requires */ (N == m_size_dynamic && N > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::span &vals) { + for (size_t r = 0; r < N; r++) + m_dyn_vals[r] = static_cast(vals[r]); + } -} // end namespace detail -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/compressed_pair.hpp + MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, + /* requires */ (N == m_size_dynamic && N == 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::span &) : m_dyn_vals{} {} +#endif -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + // constructors from all values + MDSPAN_TEMPLATE_REQUIRES(class... DynVals, + /* requires */ (sizeof...(DynVals) != + m_size_dynamic && + m_size_dynamic > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(DynVals... vals) + : m_dyn_vals{} { + static_assert((sizeof...(DynVals) == m_size), "Invalid number of values."); + TDynamic values[m_size]{static_cast(vals)...}; + for (size_t r = 0; r < m_size; r++) { + TStatic static_val = static_vals_t::get(r); + if (static_val == dyn_tag) { + m_dyn_vals[dyn_map_t::get(r)] = values[r]; + } +// Precondition check +#ifdef _MDSPAN_DEBUG + else { + assert(values[r] == static_cast(static_val)); + } +#endif + } + } + + MDSPAN_TEMPLATE_REQUIRES( + class T, size_t N, + /* requires */ (N != m_size_dynamic && m_size_dynamic > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::array &vals) { + static_assert((N == m_size), "Invalid number of values."); +// Precondition check +#ifdef _MDSPAN_DEBUG + assert(N == m_size); +#endif + for (size_t r = 0; r < m_size; r++) { + TStatic static_val = static_vals_t::get(r); + if (static_val == dyn_tag) { + m_dyn_vals[dyn_map_t::get(r)] = static_cast(vals[r]); + } +// Precondition check +#ifdef _MDSPAN_DEBUG + else { + assert(static_cast(vals[r]) == + static_cast(static_val)); + } #endif + } + } #ifdef __cpp_lib_span + MDSPAN_TEMPLATE_REQUIRES( + class T, size_t N, + /* requires */ (N != m_size_dynamic && m_size_dynamic > 0)) + MDSPAN_INLINE_FUNCTION + constexpr maybe_static_array(const std::span &vals) { + static_assert((N == m_size) || (m_size == dynamic_extent)); +#ifdef _MDSPAN_DEBUG + assert(N == m_size); +#endif + for (size_t r = 0; r < m_size; r++) { + TStatic static_val = static_vals_t::get(r); + if (static_val == dyn_tag) { + m_dyn_vals[dyn_map_t::get(r)] = static_cast(vals[r]); + } +#ifdef _MDSPAN_DEBUG + else { + assert(static_cast(vals[r]) == + static_cast(static_val)); + } +#endif + } + } #endif -namespace std { -namespace experimental { -namespace detail { + // access functions + MDSPAN_INLINE_FUNCTION + constexpr static TStatic static_value(size_t r) { return static_vals_t::get(r); } -//============================================================================== + MDSPAN_INLINE_FUNCTION + constexpr TDynamic value(size_t r) const { + TStatic static_val = static_vals_t::get(r); + return static_val == dyn_tag ? m_dyn_vals[dyn_map_t::get(r)] + : static_cast(static_val); + } + MDSPAN_INLINE_FUNCTION + constexpr TDynamic operator[](size_t r) const { return value(r); } -_MDSPAN_INLINE_VARIABLE constexpr struct - __construct_psa_from_dynamic_exts_values_tag_t { -} __construct_psa_from_dynamic_exts_values_tag = {}; -_MDSPAN_INLINE_VARIABLE constexpr struct - __construct_psa_from_all_exts_values_tag_t { -} __construct_psa_from_all_exts_values_tag = {}; + // observers + MDSPAN_INLINE_FUNCTION + constexpr static size_t size() { return m_size; } + MDSPAN_INLINE_FUNCTION + constexpr static size_t size_dynamic() { return m_size_dynamic; } +}; -struct __construct_psa_from_all_exts_array_tag_t {}; -template struct __construct_psa_from_dynamic_exts_array_tag_t {}; +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE -//============================================================================== +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -template using __repeated_with_idxs = _T; +// ------------------------------------------------------------------ +// ------------ extents --------------------------------------------- +// ------------------------------------------------------------------ -//============================================================================== +// Class to describe the extents of a multi dimensional array. +// Used by mdspan, mdarray and layout mappings. +// See ISO C++ standard [mdspan.extents] -#if _MDSPAN_PRESERVE_STANDARD_LAYOUT +template class extents { +public: + // typedefs for integral types used + using index_type = IndexType; + using size_type = std::make_unsigned_t; + using rank_type = size_t; -/** - * PSA = "partially static array" - * - * @tparam _T - * @tparam _ValsSeq - * @tparam __sentinal - */ -template (dynamic_extent), - class _IdxsSeq = make_index_sequence<_ValsSeq::size()>> -struct __standard_layout_psa; - -//============================================================================== -// Static case -template -struct __standard_layout_psa< - _Tag, _T, _static_t, integer_sequence<_static_t, __value, __values_or_sentinals...>, - __sentinal, integer_sequence> -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : private __no_unique_address_emulation<__standard_layout_psa< - _Tag, _T, _static_t, integer_sequence<_static_t, __values_or_sentinals...>, __sentinal, - integer_sequence>> -#endif -{ - - //-------------------------------------------------------------------------- + static_assert(std::is_integral::value && !std::is_same::value, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents::index_type must be a signed or unsigned integer type"); +private: + constexpr static rank_type m_rank = sizeof...(Extents); + constexpr static rank_type m_rank_dynamic = + _MDSPAN_FOLD_PLUS_RIGHT((Extents == dynamic_extent), /* + ... + */ 0); - using __next_t = - __standard_layout_psa<_Tag, _T, _static_t, - integer_sequence<_static_t, __values_or_sentinals...>, - __sentinal, integer_sequence>; + // internal storage type using maybe_static_array + using vals_t = + detail::maybe_static_array; + _MDSPAN_NO_UNIQUE_ADDRESS vals_t m_vals; -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - _MDSPAN_NO_UNIQUE_ADDRESS __next_t __next_; -#else - using __base_t = __no_unique_address_emulation<__next_t>; -#endif +public: + // [mdspan.extents.obs], observers of multidimensional index space + MDSPAN_INLINE_FUNCTION + constexpr static rank_type rank() noexcept { return m_rank; } + MDSPAN_INLINE_FUNCTION + constexpr static rank_type rank_dynamic() noexcept { return m_rank_dynamic; } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 __next_t &__next() noexcept { -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __next_; -#else - return this->__base_t::__ref(); -#endif - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr __next_t const &__next() const noexcept { -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __next_; -#else - return this->__base_t::__ref(); -#endif + MDSPAN_INLINE_FUNCTION + constexpr index_type extent(rank_type r) const noexcept { return m_vals.value(r); } + MDSPAN_INLINE_FUNCTION + constexpr static size_t static_extent(rank_type r) noexcept { + return vals_t::static_value(r); } - static constexpr auto __size = sizeof...(_Idxs) + 1; - static constexpr auto __size_dynamic = __next_t::__size_dynamic; - - //-------------------------------------------------------------------------- - - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa const &) noexcept = - default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa &&) noexcept = default; + // [mdspan.extents.cons], constructors MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__standard_layout_psa() noexcept = default; - - //-------------------------------------------------------------------------- + constexpr extents() noexcept = default; + // Construction from just dynamic or all values. + // Precondition check is deferred to maybe_static_array constructor + MDSPAN_TEMPLATE_REQUIRES( + class... OtherIndexTypes, + /* requires */ ( + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(std::is_convertible, OtherIndexTypes, + index_type) /* && ... */) && + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, + OtherIndexTypes) /* && ... */) && + (sizeof...(OtherIndexTypes) == m_rank || + sizeof...(OtherIndexTypes) == m_rank_dynamic))) MDSPAN_INLINE_FUNCTION - constexpr __standard_layout_psa( - __construct_psa_from_all_exts_values_tag_t, _T const & /*__val*/, - __repeated_with_idxs<_Idxs, _T> const &... __vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __construct_psa_from_all_exts_values_tag, __vals... -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } - - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __construct_psa_from_dynamic_exts_values_tag_t, - _Ts const &... __vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __construct_psa_from_dynamic_exts_values_tag, __vals... -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - array<_U, _N> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t const & __tag, - array<_U, _NStatic> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __tag, __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } + constexpr explicit extents(OtherIndexTypes... dynvals) noexcept + : m_vals(static_cast(dynvals)...) {} - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic> __tag, - array<_U, _NDynamic> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __tag, __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } + MDSPAN_TEMPLATE_REQUIRES( + class OtherIndexType, size_t N, + /* requires */ + ( + _MDSPAN_TRAIT(std::is_convertible, const OtherIndexType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, + const OtherIndexType&) && + (N == m_rank || N == m_rank_dynamic))) + MDSPAN_INLINE_FUNCTION + MDSPAN_CONDITIONAL_EXPLICIT(N != m_rank_dynamic) + constexpr extents(const std::array &exts) noexcept + : m_vals(std::move(exts)) {} #ifdef __cpp_lib_span - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - span<_U, _N> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t const & __tag, - span<_U, _NStatic> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __tag, __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic> __tag, - span<_U, _NDynamic> const &__vals) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __tag, __vals -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { } -#endif - - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __standard_layout_psa<_UTag, _U, _static_U, _UValsSeq, __u_sentinal, _IdxsSeq> const - &__rhs) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __next_{ -#else - : __base_t(__base_t{__next_t( -#endif - __rhs.__next() -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) + MDSPAN_TEMPLATE_REQUIRES( + class OtherIndexType, size_t N, + /* requires */ + (_MDSPAN_TRAIT(std::is_convertible, const OtherIndexType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const OtherIndexType&) && + (N == m_rank || N == m_rank_dynamic))) + MDSPAN_INLINE_FUNCTION + MDSPAN_CONDITIONAL_EXPLICIT(N != m_rank_dynamic) + constexpr extents(const std::span &exts) noexcept + : m_vals(std::move(exts)) {} #endif - { } - - //-------------------------------------------------------------------------- - // See https://godbolt.org/z/_KSDNX for a summary-by-example of why this is - // necessary. We're using inheritance here instead of an alias template - // because we have to deduce __values_or_sentinals in several places, and - // alias templates don't permit that in this context. - MDSPAN_FORCE_INLINE_FUNCTION - constexpr __standard_layout_psa const &__enable_psa_conversion() const - noexcept { - return *this; +private: + // Function to construct extents storage from other extents. + // With C++ 17 the first two variants could be collapsed using if constexpr + // in which case you don't need all the requires clauses. + // in C++ 14 mode that doesn't work due to infinite recursion + MDSPAN_TEMPLATE_REQUIRES( + size_t DynCount, size_t R, class OtherExtents, class... DynamicValues, + /* requires */ ((R < m_rank) && (static_extent(R) == dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr + vals_t __construct_vals_from_extents(std::integral_constant, + std::integral_constant, + const OtherExtents &exts, + DynamicValues... dynamic_values) noexcept { + return __construct_vals_from_extents( + std::integral_constant(), + std::integral_constant(), exts, dynamic_values..., + exts.extent(R)); } - template = 0> - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get_n() const noexcept { - return __next().template __get_n<_I>(); - } - template = 1> - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get_n() const noexcept { - return __value; - } - template = 0> - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 void - __set_n(_T const &__rhs) noexcept { - __next().__set_value(__rhs); - } - template = 1> - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 void - __set_n(_T const &) noexcept { - // Don't assert here because that would break constexpr. This better - // not change anything, though - } - template = __sentinal> - MDSPAN_FORCE_INLINE_FUNCTION static constexpr _static_t __get_static_n() noexcept { - return __value; - } - template __default = __sentinal> - MDSPAN_FORCE_INLINE_FUNCTION static constexpr _static_t __get_static_n() noexcept { - return __next_t::template __get_static_n<_I, __default>(); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get(size_t __n) const noexcept { - return __value * (_T(_Idx == __n)) + __next().__get(__n); + MDSPAN_TEMPLATE_REQUIRES( + size_t DynCount, size_t R, class OtherExtents, class... DynamicValues, + /* requires */ ((R < m_rank) && (static_extent(R) != dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr + vals_t __construct_vals_from_extents(std::integral_constant, + std::integral_constant, + const OtherExtents &exts, + DynamicValues... dynamic_values) noexcept { + return __construct_vals_from_extents( + std::integral_constant(), + std::integral_constant(), exts, dynamic_values...); } - //-------------------------------------------------------------------------- -}; - -//============================================================================== - -// Dynamic case, __next_t may or may not be empty -template -struct __standard_layout_psa< - _Tag, _T, _static_t, integer_sequence<_static_t, __sentinal, __values_or_sentinals...>, - __sentinal, integer_sequence> { - //-------------------------------------------------------------------------- - - using __next_t = - __standard_layout_psa<_Tag, _T, _static_t, - integer_sequence<_static_t, __values_or_sentinals...>, - __sentinal, integer_sequence>; - - using __value_pair_t = __compressed_pair<_T, __next_t>; - __value_pair_t __value_pair; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 __next_t &__next() noexcept { - return __value_pair.__second(); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr __next_t const &__next() const noexcept { - return __value_pair.__second(); + MDSPAN_TEMPLATE_REQUIRES( + size_t DynCount, size_t R, class OtherExtents, class... DynamicValues, + /* requires */ ((R == m_rank) && (DynCount == m_rank_dynamic))) + MDSPAN_INLINE_FUNCTION + constexpr + vals_t __construct_vals_from_extents(std::integral_constant, + std::integral_constant, + const OtherExtents &, + DynamicValues... dynamic_values) noexcept { + return vals_t{static_cast(dynamic_values)...}; } - static constexpr auto __size = sizeof...(_Idxs) + 1; - static constexpr auto __size_dynamic = 1 + __next_t::__size_dynamic; - - //-------------------------------------------------------------------------- - - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa const &) noexcept = - default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__standard_layout_psa() noexcept = default; - - //-------------------------------------------------------------------------- +public: + // Converting constructor from other extents specializations + MDSPAN_TEMPLATE_REQUIRES( + class OtherIndexType, size_t... OtherExtents, + /* requires */ + ( + /* multi-stage check to protect from invalid pack expansion when sizes + don't match? */ + decltype(detail::__check_compatible_extents( + // using: sizeof...(Extents) == sizeof...(OtherExtents) as the second argument fails with MSVC+NVCC with some obscure expansion error + // MSVC: 19.38.33133 NVCC: 12.0 + std::integral_constant::rank() == extents::rank()>{}, + std::integer_sequence{}, + std::integer_sequence{}))::value + ) + ) MDSPAN_INLINE_FUNCTION - constexpr __standard_layout_psa( - __construct_psa_from_all_exts_values_tag_t, _T const &__val, - __repeated_with_idxs<_Idxs, _T> const &... __vals) noexcept - : __value_pair(__val, - __next_t(__construct_psa_from_all_exts_values_tag, - __vals...)) {} - - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __construct_psa_from_dynamic_exts_values_tag_t, _T const &__val, - _Ts const &... __vals) noexcept - : __value_pair(__val, - __next_t(__construct_psa_from_dynamic_exts_values_tag, - __vals...)) {} - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - array<_U, _N> const &__vals) noexcept - : __value_pair(::std::get<_Idx>(__vals), __vals) {} - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t __tag, - array<_U, _NStatic> const &__vals) noexcept - : __value_pair( - ::std::get<_Idx>(__vals), - __next_t(__tag, - __vals)) {} - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic>, - array<_U, _NDynamic> const &__vals) noexcept - : __value_pair( - ::std::get<_IDynamic>(__vals), - __next_t(__construct_psa_from_dynamic_exts_array_tag_t<_IDynamic + 1>{}, - __vals)) {} - -#ifdef __cpp_lib_span - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - span<_U, _N> const &__vals) noexcept - : __value_pair(__vals[_Idx], __vals) {} - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t __tag, - span<_U, _NStatic> const &__vals) noexcept - : __value_pair( - __vals[_Idx], - __next_t(__tag, - __vals)) {} - - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic>, - span<_U, _NDynamic> const &__vals) noexcept - : __value_pair( - __vals[_IDynamic], - __next_t(__construct_psa_from_dynamic_exts_array_tag_t<_IDynamic + 1>{}, - __vals)) {} -#endif - - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __standard_layout_psa<_UTag, _U, _static_U, _UValsSeq, __u_sentinal, _UIdxsSeq> const - &__rhs) noexcept - : __value_pair(__rhs.template __get_n<_Idx>(), __rhs.__next()) {} - - //-------------------------------------------------------------------------- - - // See comment in the previous partial specialization for why this is - // necessary. Or just trust me that it's messy. - MDSPAN_FORCE_INLINE_FUNCTION - constexpr __standard_layout_psa const &__enable_psa_conversion() const - noexcept { - return *this; + MDSPAN_CONDITIONAL_EXPLICIT((((Extents != dynamic_extent) && + (OtherExtents == dynamic_extent)) || + ...) || + (std::numeric_limits::max() < + std::numeric_limits::max())) + constexpr extents(const extents &other) noexcept + : m_vals(__construct_vals_from_extents( + std::integral_constant(), + std::integral_constant(), other)) {} + + // Comparison operator + template + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const extents &lhs, + const extents &rhs) noexcept { + return + rank() == extents::rank() && + detail::rankwise_equal(detail::with_rank{}, rhs, lhs, detail::extent); } - template = 0> - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get_n() const noexcept { - return __next().template __get_n<_I>(); - } - template = 1> - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get_n() const noexcept { - return __value_pair.__first(); - } - template = 0> - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 void - __set_n(_T const &__rhs) noexcept { - __next().__set_value(__rhs); - } - template = 1> - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 void - __set_n(_T const &__rhs) noexcept { - __value_pair.__first() = __rhs; - } - template __default = __sentinal> - MDSPAN_FORCE_INLINE_FUNCTION static constexpr _static_t __get_static_n() noexcept { - return __default; - } - template __default = __sentinal> - MDSPAN_FORCE_INLINE_FUNCTION static constexpr _static_t __get_static_n() noexcept { - return __next_t::template __get_static_n<_I, __default>(); - } - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get(size_t __n) const noexcept { - return __value_pair.__first() * (_T(_Idx == __n)) + __next().__get(__n); +#if !(MDSPAN_HAS_CXX_20) + template + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(extents const &lhs, + extents const &rhs) noexcept { + return !(lhs == rhs); } - - //-------------------------------------------------------------------------- -}; - -// empty/terminal case -template -struct __standard_layout_psa<_Tag, _T, _static_t, integer_sequence<_static_t>, __sentinal, - integer_sequence> { - //-------------------------------------------------------------------------- - - static constexpr auto __size = 0; - static constexpr auto __size_dynamic = 0; - - //-------------------------------------------------------------------------- - - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa() noexcept -#if defined(__clang__) || defined(_MDSPAN_DEFAULTED_CONSTRUCTORS_INHERITANCE_WORKAROUND) - // As far as I can tell, there appears to be a bug in clang that's causing - // this to be non-constexpr when it's defaulted. - { } -#else - = default; #endif - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa const &) noexcept = - default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __standard_layout_psa(__standard_layout_psa &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __standard_layout_psa & - operator=(__standard_layout_psa &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__standard_layout_psa() noexcept = default; - - MDSPAN_INLINE_FUNCTION - constexpr __standard_layout_psa( - __construct_psa_from_all_exts_values_tag_t) noexcept {} +}; - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __construct_psa_from_dynamic_exts_values_tag_t) noexcept {} +// Recursive helper classes to implement dextents alias for extents +namespace detail { - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - array<_U, _N> const &) noexcept {} +template > +struct __make_dextents; - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t, - array<_U, _NStatic> const &) noexcept {} +template +struct __make_dextents< + IndexType, Rank, ::MDSPAN_IMPL_STANDARD_NAMESPACE::extents> +{ + using type = typename __make_dextents< + IndexType, Rank - 1, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::extents>::type; +}; - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic>, - array<_U, _NDynamic> const &) noexcept {} +template +struct __make_dextents< + IndexType, 0, ::MDSPAN_IMPL_STANDARD_NAMESPACE::extents> +{ + using type = ::MDSPAN_IMPL_STANDARD_NAMESPACE::extents; +}; -#ifdef __cpp_lib_span - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - span<_U, _N> const &) noexcept {} +} // end namespace detail - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_all_exts_array_tag_t, - span<_U, _NStatic> const &) noexcept {} +// [mdspan.extents.dextents], alias template +template +using dextents = typename detail::__make_dextents::type; - template - MDSPAN_INLINE_FUNCTION constexpr explicit __standard_layout_psa( - __construct_psa_from_dynamic_exts_array_tag_t<_IDynamic>, - span<_U, _NDynamic> const &) noexcept {} +// Deduction guide for extents +#if defined(_MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION) +template +extents(IndexTypes...) + -> extents; #endif - template - MDSPAN_INLINE_FUNCTION constexpr __standard_layout_psa( - __standard_layout_psa<_UTag, _U, _static_U, _UValsSeq, __u_sentinal, _UIdxsSeq> const&) noexcept {} +// Helper type traits for identifying a class as extents. +namespace detail { - // See comment in the previous partial specialization for why this is - // necessary. Or just trust me that it's messy. - MDSPAN_FORCE_INLINE_FUNCTION - constexpr __standard_layout_psa const &__enable_psa_conversion() const - noexcept { - return *this; - } +template struct __is_extents : ::std::false_type {}; - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get(size_t /*n*/) const noexcept { - return 0; - } -}; +template +struct __is_extents<::MDSPAN_IMPL_STANDARD_NAMESPACE::extents> + : ::std::true_type {}; -// Same thing, but with a disambiguator so that same-base issues doesn't cause -// a loss of standard-layout-ness. -template -struct __partially_static_sizes_tagged - : __standard_layout_psa< - _Tag, T, _static_t, - integer_sequence<_static_t, __values_or_sentinals...>> { - using __tag_t = _Tag; - using __psa_impl_t = __standard_layout_psa< - _Tag, T, _static_t, integer_sequence<_static_t, __values_or_sentinals...>>; - using __psa_impl_t::__psa_impl_t; -#ifdef _MDSPAN_DEFAULTED_CONSTRUCTORS_INHERITANCE_WORKAROUND - MDSPAN_INLINE_FUNCTION -#endif - constexpr __partially_static_sizes_tagged() noexcept -#ifdef _MDSPAN_DEFAULTED_CONSTRUCTORS_INHERITANCE_WORKAROUND - : __psa_impl_t() { } +template +#if MDSPAN_HAS_CXX_17 +inline #else - = default; +static #endif - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __partially_static_sizes_tagged( - __partially_static_sizes_tagged const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __partially_static_sizes_tagged( - __partially_static_sizes_tagged &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __partially_static_sizes_tagged & - operator=(__partially_static_sizes_tagged const &) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __partially_static_sizes_tagged & - operator=(__partially_static_sizes_tagged &&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__partially_static_sizes_tagged() noexcept = default; +constexpr bool __is_extents_v = __is_extents::value; - template - MDSPAN_FORCE_INLINE_FUNCTION constexpr explicit __partially_static_sizes_tagged( - __partially_static_sizes_tagged<_UTag, T, _static_t, __values_or_sentinals...> const& __vals - ) noexcept : __psa_impl_t(__vals.__enable_psa_conversion()) { } -}; +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType user_index, + ExtentsIndexType /* current_extent */, + std::true_type /* is_signed */) +{ + (void) user_index; // prevent unused variable warning +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) >= 0); +#endif +} -struct __no_tag {}; -template -struct __partially_static_sizes - : __partially_static_sizes_tagged<__no_tag, T, _static_t, __values_or_sentinals...> { -private: - using __base_t = - __partially_static_sizes_tagged<__no_tag, T, _static_t, __values_or_sentinals...>; - template - MDSPAN_FORCE_INLINE_FUNCTION constexpr __partially_static_sizes( - __partially_static_sizes_tagged<_UTag, T, _static_t, __values_or_sentinals...>&& __vals - ) noexcept : __base_t(::std::move(__vals)) { } -public: - using __base_t::__base_t; +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType /* user_index */, + ExtentsIndexType /* current_extent */, + std::false_type /* is_signed */) +{} -#ifdef _MDSPAN_DEFAULTED_CONSTRUCTORS_INHERITANCE_WORKAROUND - MDSPAN_INLINE_FUNCTION - constexpr __partially_static_sizes() noexcept : __base_t() { } +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_upper_bound(InputIndexType user_index, + ExtentsIndexType current_extent) +{ + (void) user_index; // prevent unused variable warnings + (void) current_extent; +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) < current_extent); #endif - template - MDSPAN_FORCE_INLINE_FUNCTION constexpr __partially_static_sizes_tagged< - _UTag, T, _static_t, __values_or_sentinals...> - __with_tag() const noexcept { - return __partially_static_sizes_tagged<_UTag, T, _static_t, __values_or_sentinals...>(*this); - } -}; +} -#endif // _MDSPAN_PRESERVE_STATIC_LAYOUT +// Returning true to use AND fold instead of comma +// CPP14 mode doesn't like the use of void expressions +// with the way the _MDSPAN_FOLD_AND is set up +template +MDSPAN_INLINE_FUNCTION +constexpr bool +check_one_index(InputIndex user_index, + ExtentsIndexType current_extent) +{ + check_lower_bound(user_index, current_extent, + std::integral_constant::value>{}); + check_upper_bound(user_index, current_extent); + return true; +} -} // end namespace detail -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/standard_layout_static_array.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/type_list.hpp -/* +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices_helper(std::index_sequence, + const extents& exts, + Indices... indices) +{ + // Suppress warning about statement has no effect + (void) _MDSPAN_FOLD_AND( + (check_one_index(indices, exts.extent(RankIndices))) + ); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices(const extents& exts, + Indices... indices) +{ + check_all_indices_helper(std::make_index_sequence(), + exts, indices...); +} + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/extents.hpp +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_stride.hpp //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. +//@HEADER + +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/compressed_pair.hpp +//@HEADER +// ************************************************************************ // -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// ************************************************************************ //@HEADER -*/ - -namespace std { -namespace experimental { +#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER -//============================================================================== +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { -template struct __type_list { static constexpr auto __size = sizeof...(_Ts); }; - -// Implementation of type_list at() that's heavily optimized for small typelists -template struct __type_at; -template > struct __type_at_large_impl; - -template -struct __type_at_entry { }; - -template -struct __type_at_assign_op_ignore_rest { - template - __type_at_assign_op_ignore_rest<_Result> operator=(_T&&); - using type = _Result; -}; - -struct __type_at_assign_op_impl { - template - __type_at_assign_op_impl operator=(__type_at_entry<_I, _Idx, _T>&&); - template - __type_at_assign_op_ignore_rest<_T> operator=(__type_at_entry<_I, _I, _T>&&); -}; - -template -struct __type_at_large_impl<_I, __type_list<_Ts...>, integer_sequence> - : decltype( - _MDSPAN_FOLD_ASSIGN_LEFT(__type_at_assign_op_impl{}, /* = ... = */ __type_at_entry<_I, _Idxs, _Ts>{}) - ) -{ }; - -template -struct __type_at<_I, __type_list<_Ts...>> - : __type_at_large_impl<_I, __type_list<_Ts...>> -{ }; +//============================================================================== -template -struct __type_at<0, __type_list<_T0, _Ts...>> { - using type = _T0; +template +struct __no_unique_address_emulation { + using __stored_type = _T; + _T __v; + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__ref() const noexcept { + return __v; + } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__ref() noexcept { + return __v; + } }; -template -struct __type_at<1, __type_list<_T0, _T1, _Ts...>> { - using type = _T1; -}; +// Empty case +// This doesn't work if _T is final, of course, but we're not using anything +// like that currently. That kind of thing could be added pretty easily though +template +struct __no_unique_address_emulation< + _T, _Disambiguator, + std::enable_if_t<_MDSPAN_TRAIT(std::is_empty, _T) && + // If the type isn't trivially destructible, its destructor + // won't be called at the right time, so don't use this + // specialization + _MDSPAN_TRAIT(std::is_trivially_destructible, _T)>> : +#ifdef _MDSPAN_COMPILER_MSVC + // MSVC doesn't allow you to access public static member functions of a type + // when you *happen* to privately inherit from that type. + protected +#else + // But we still want this to be private if possible so that we don't accidentally + // access members of _T directly rather than calling __ref() first, which wouldn't + // work if _T happens to be stateful and thus we're using the unspecialized definition + // of __no_unique_address_emulation above. + private +#endif + _T { + using __stored_type = _T; + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T const &__ref() const noexcept { + return *static_cast<_T const *>(this); + } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T &__ref() noexcept { + return *static_cast<_T *>(this); + } -template -struct __type_at<2, __type_list<_T0, _T1, _T2, _Ts...>> { - using type = _T2; -}; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __no_unique_address_emulation() noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __no_unique_address_emulation( + __no_unique_address_emulation const &) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __no_unique_address_emulation( + __no_unique_address_emulation &&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __no_unique_address_emulation & + operator=(__no_unique_address_emulation const &) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __no_unique_address_emulation & + operator=(__no_unique_address_emulation &&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + ~__no_unique_address_emulation() noexcept = default; -template -struct __type_at<3, __type_list<_T0, _T1, _T2, _T3, _Ts...>> { - using type = _T3; + // Explicitly make this not a reference so that the copy or move + // constructor still gets called. + MDSPAN_INLINE_FUNCTION + explicit constexpr __no_unique_address_emulation(_T const& __v) noexcept : _T(__v) {} + MDSPAN_INLINE_FUNCTION + explicit constexpr __no_unique_address_emulation(_T&& __v) noexcept : _T(::std::move(__v)) {} }; - -} // namespace detail - //============================================================================== -} // end namespace experimental -} // end namespace std - -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/type_list.hpp - -// Needs to be after the includes above to work with the single header generator -#if !_MDSPAN_PRESERVE_STANDARD_LAYOUT +} // end namespace detail +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/no_unique_address.hpp +#endif -namespace std { -namespace experimental { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace detail { -//============================================================================== - -template struct __mask_element {}; - -template -struct __mask_sequence_assign_op { - template <_T _V> - __mask_sequence_assign_op<_T, _Result..., _V> - operator=(__mask_element<_T, _V, true>&&); - template <_T _V> - __mask_sequence_assign_op<_T, _Result...> - operator=(__mask_element<_T, _V, false>&&); - using __result = integer_sequence<_T, _Result...>; -}; - -template -struct __mask_sequence; - -template -struct __mask_sequence, integer_sequence> -{ - using type = typename decltype( - _MDSPAN_FOLD_ASSIGN_LEFT( - __mask_sequence_assign_op<_T>{}, /* = ... = */ __mask_element<_T, _Vals, _Masks>{} - ) - )::__result; -}; - -//============================================================================== - -template -class __partially_static_array_impl; - -template < - class _T, class _static_t, - _static_t... __values_or_sentinals, _static_t __sentinal, - size_t... _Idxs, - size_t... _IdxsDynamic, - size_t... _IdxsDynamicIdxs -> -class __partially_static_array_impl< - _T, - _static_t, - integer_sequence<_static_t, __values_or_sentinals...>, - __sentinal, - integer_sequence, - integer_sequence, - integer_sequence -> - : private __maybe_static_value<_T, _static_t, __values_or_sentinals, __sentinal, - _Idxs>... { -private: - - template - using __base_n = typename __type_at<_N, - __type_list<__maybe_static_value<_T, _static_t, __values_or_sentinals, __sentinal, _Idxs>...> - >::type; - -public: - - static constexpr auto __size = sizeof...(_Idxs); - static constexpr auto __size_dynamic = - _MDSPAN_FOLD_PLUS_RIGHT(static_cast((__values_or_sentinals == __sentinal)), /* + ... + */ 0); - - //-------------------------------------------------------------------------- +// For no unique address emulation, this is the case taken when neither are empty. +// For real `[[no_unique_address]]`, this case is always taken. +template struct __compressed_pair { + _MDSPAN_NO_UNIQUE_ADDRESS _T1 __t1_val{}; + _MDSPAN_NO_UNIQUE_ADDRESS _T2 __t2_val{}; + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T1 &__first() noexcept { return __t1_val; } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T1 const &__first() const noexcept { + return __t1_val; + } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T2 &__second() noexcept { return __t2_val; } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T2 const &__second() const noexcept { + return __t2_val; + } MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __partially_static_array_impl() = default; + constexpr __compressed_pair() = default; MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __partially_static_array_impl( - __partially_static_array_impl const &) noexcept = default; + constexpr __compressed_pair(__compressed_pair const &) = default; MDSPAN_INLINE_FUNCTION_DEFAULTED - constexpr __partially_static_array_impl( - __partially_static_array_impl &&) noexcept = default; + constexpr __compressed_pair(__compressed_pair &&) = default; MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __partially_static_array_impl & - operator=(__partially_static_array_impl const &) noexcept = default; + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair const &) = default; MDSPAN_INLINE_FUNCTION_DEFAULTED - _MDSPAN_CONSTEXPR_14_DEFAULTED __partially_static_array_impl & - operator=(__partially_static_array_impl &&) noexcept = default; + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair &&) = default; MDSPAN_INLINE_FUNCTION_DEFAULTED - ~__partially_static_array_impl() noexcept = default; - - MDSPAN_INLINE_FUNCTION - constexpr __partially_static_array_impl( - __construct_psa_from_all_exts_values_tag_t, - __repeated_with_idxs<_Idxs, _T> const &... __vals) noexcept - : __base_n<_Idxs>(__base_n<_Idxs>{{__vals}})... {} - - MDSPAN_INLINE_FUNCTION - constexpr __partially_static_array_impl( - __construct_psa_from_dynamic_exts_values_tag_t, - __repeated_with_idxs<_IdxsDynamicIdxs, _T> const &... __vals) noexcept - : __base_n<_IdxsDynamic>(__base_n<_IdxsDynamic>{{__vals}})... {} - - MDSPAN_INLINE_FUNCTION constexpr explicit __partially_static_array_impl( - array<_T, sizeof...(_Idxs)> const& __vals) noexcept - : __partially_static_array_impl( - __construct_psa_from_all_exts_values_tag, - ::std::get<_Idxs>(__vals)...) {} - - // clang-format off - MDSPAN_FUNCTION_REQUIRES( - (MDSPAN_INLINE_FUNCTION constexpr explicit), - __partially_static_array_impl, - (array<_T, __size_dynamic> const &__vals), noexcept, - /* requires */ - (sizeof...(_Idxs) != __size_dynamic) - ): __partially_static_array_impl( - __construct_psa_from_dynamic_exts_values_tag, - ::std::get<_IdxsDynamicIdxs>(__vals)...) {} - // clang-format on - - template - MDSPAN_INLINE_FUNCTION constexpr __partially_static_array_impl( - __partially_static_array_impl< - _U, _static_u, _UValsSeq, __u_sentinal, _UIdxsSeq, - _UIdxsDynamicSeq, _UIdxsDynamicIdxsSeq> const &__rhs) noexcept - : __partially_static_array_impl( - __construct_psa_from_all_exts_values_tag, - __rhs.template __get_n<_Idxs>()...) {} - - //-------------------------------------------------------------------------- - - // See comment in the previous partial specialization for why this is - // necessary. Or just trust me that it's messy. - MDSPAN_FORCE_INLINE_FUNCTION - constexpr __partially_static_array_impl const &__enable_psa_conversion() const - noexcept { - return *this; - } + ~__compressed_pair() = default; + template + MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_T1Like &&__t1, _T2Like &&__t2) + //: __t1_val((_T1Like &&) __t1), __t2_val((_T2Like &&) __t2) {} + : __t1_val(static_cast<_T1Like &&>(__t1)), __t2_val(static_cast<_T2Like &&>(__t2)) {} +}; - template - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T __get_n() const noexcept { - return static_cast<__base_n<_I> const*>(this)->__value(); - } +#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - template - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 void __set_n(_U&& __rhs) noexcept { - static_cast<__base_n<_I>*>(this)->__set_value((_U&&)__rhs); +// First empty. +template +struct __compressed_pair< + _T1, _T2, + std::enable_if_t<_MDSPAN_TRAIT(std::is_empty, _T1) && !_MDSPAN_TRAIT(std::is_empty, _T2)>> + : private _T1 { + _T2 __t2_val{}; + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T1 &__first() noexcept { + return *static_cast<_T1 *>(this); } - - template - MDSPAN_FORCE_INLINE_FUNCTION static constexpr _static_t - __get_static_n() noexcept { - return __base_n<_I>::__static_value == __sentinal ? - __default : __base_n<_I>::__static_value; + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T1 const &__first() const noexcept { + return *static_cast<_T1 const *>(this); } - - MDSPAN_FORCE_INLINE_FUNCTION constexpr _T - __get(size_t __n) const noexcept { - return _MDSPAN_FOLD_PLUS_RIGHT( - (_T(_Idxs == __n) * __get_n<_Idxs>()), /* + ... + */ _T(0) - ); + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T2 &__second() noexcept { return __t2_val; } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T2 const &__second() const noexcept { + return __t2_val; } + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair() = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + ~__compressed_pair() = default; + template + MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_T1Like &&__t1, _T2Like &&__t2) + : _T1(static_cast<_T1Like &&> (__t1)), __t2_val(static_cast<_T2Like &&>(__t2)) {} }; -//============================================================================== +// Second empty. +template +struct __compressed_pair< + _T1, _T2, + std::enable_if_t> + : private _T2 { + _T1 __t1_val{}; + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T1 &__first() noexcept { return __t1_val; } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T1 const &__first() const noexcept { + return __t1_val; + } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T2 &__second() noexcept { + return *static_cast<_T2 *>(this); + } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T2 const &__second() const noexcept { + return *static_cast<_T2 const *>(this); + } -template > -struct __partially_static_array_impl_maker; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair() = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + ~__compressed_pair() = default; -template < - class _T, class _static_t, _static_t... _Vals, _static_t __sentinal, size_t... _Idxs -> -struct __partially_static_array_impl_maker< - _T, _static_t, integer_sequence<_static_t, _Vals...>, __sentinal, integer_sequence -> -{ - using __dynamic_idxs = typename __mask_sequence< - integer_sequence, - integer_sequence - >::type; - using __impl_base = - __partially_static_array_impl<_T, _static_t, - integer_sequence<_static_t, _Vals...>, - __sentinal, integer_sequence, - __dynamic_idxs, - make_index_sequence<__dynamic_idxs::size()> - >; + template + MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_T1Like &&__t1, _T2Like &&__t2) + : _T2(static_cast<_T2Like &&>(__t2)), __t1_val(static_cast<_T1Like &&>(__t1)) {} }; -template -class __partially_static_array_with_sentinal - : public __partially_static_array_impl_maker<_T, _static_t, _ValsSeq, __sentinal>::__impl_base +// Both empty. +template +struct __compressed_pair< + _T1, _T2, + std::enable_if_t<_MDSPAN_TRAIT(std::is_empty, _T1) && _MDSPAN_TRAIT(std::is_empty, _T2)>> + // We need to use the __no_unique_address_emulation wrapper here to avoid + // base class ambiguities. +#ifdef _MDSPAN_COMPILER_MSVC +// MSVC doesn't allow you to access public static member functions of a type +// when you *happen* to privately inherit from that type. + : protected __no_unique_address_emulation<_T1, 0>, + protected __no_unique_address_emulation<_T2, 1> +#else + : private __no_unique_address_emulation<_T1, 0>, + private __no_unique_address_emulation<_T2, 1> +#endif { -private: - using __base_t = typename __partially_static_array_impl_maker<_T, _static_t, _ValsSeq, __sentinal>::__impl_base; -public: - using __base_t::__base_t; -}; + using __first_base_t = __no_unique_address_emulation<_T1, 0>; + using __second_base_t = __no_unique_address_emulation<_T2, 1>; -//============================================================================== - -template -struct __partially_static_sizes : - __partially_static_array_with_sentinal< - T, _static_t, ::std::integer_sequence<_static_t, __values_or_sentinals...>> -{ -private: - using __base_t = __partially_static_array_with_sentinal< - T, _static_t, ::std::integer_sequence<_static_t, __values_or_sentinals...>>; -public: - using __base_t::__base_t; - template - MDSPAN_FORCE_INLINE_FUNCTION constexpr __partially_static_sizes - __with_tag() const noexcept { - return *this; + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T1 &__first() noexcept { + return this->__first_base_t::__ref(); + } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T1 const &__first() const noexcept { + return this->__first_base_t::__ref(); } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 _T2 &__second() noexcept { + return this->__second_base_t::__ref(); + } + MDSPAN_FORCE_INLINE_FUNCTION constexpr _T2 const &__second() const noexcept { + return this->__second_base_t::__ref(); + } + + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair() = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr __compressed_pair(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair const &) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + _MDSPAN_CONSTEXPR_14_DEFAULTED __compressed_pair & + operator=(__compressed_pair &&) = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED + ~__compressed_pair() = default; + template + MDSPAN_INLINE_FUNCTION constexpr __compressed_pair(_T1Like &&__t1, _T2Like &&__t2) noexcept + : __first_base_t(_T1(static_cast<_T1Like &&>(__t1))), + __second_base_t(_T2(static_cast<_T2Like &&>(__t2))) + { } }; -// Tags are needed for the standard layout version, but not here -template -using __partially_static_sizes_tagged = __partially_static_sizes; +#endif // !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) } // end namespace detail -} // end namespace experimental -} // end namespace std - -#endif // !_MDSPAN_PRESERVE_STANDARD_LAYOUT -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/static_array.hpp +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/compressed_pair.hpp #if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) #endif +#include +#include +#include -namespace std { -namespace experimental { - -namespace detail { +#ifdef __cpp_lib_span +#include +#endif +#if defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 && defined(__cpp_lib_concepts) +# include +#endif -template -struct _count_dynamic_extents; +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { -template -struct _count_dynamic_extents { - static constexpr size_t val = (E==dynamic_extent?1:0) + _count_dynamic_extents::val; +struct layout_left { + template + class mapping; }; - -template<> -struct _count_dynamic_extents<> { - static constexpr size_t val = 0; +struct layout_right { + template + class mapping; }; -template -static constexpr std::false_type _check_compatible_extents( - std::false_type, std::integer_sequence, std::integer_sequence -) noexcept { return { }; } +namespace detail { + template + constexpr bool __is_mapping_of = + std::is_same, Mapping>::value; -template -static std::integral_constant< - bool, - _MDSPAN_FOLD_AND( - ( - Extents == dynamic_extent - || OtherExtents == dynamic_extent - || Extents == OtherExtents - ) /* && ... */ - ) -> -_check_compatible_extents( - std::true_type, std::integer_sequence, std::integer_sequence -) noexcept { return { }; } +#if defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 +# if !defined(__cpp_lib_concepts) + namespace internal { + namespace detail { + template + concept __same_as = std::is_same_v<_Tp, _Up>; + } // namespace detail + template + concept __same_as = detail::__same_as && detail::__same_as; + } // namespace internal +# endif -struct __extents_tag { }; + template + concept __layout_mapping_alike = requires { + requires __is_extents::value; +#if defined(__cpp_lib_concepts) + { M::is_always_strided() } -> std::same_as; + { M::is_always_exhaustive() } -> std::same_as; + { M::is_always_unique() } -> std::same_as; +#else + { M::is_always_strided() } -> internal::__same_as; + { M::is_always_exhaustive() } -> internal::__same_as; + { M::is_always_unique() } -> internal::__same_as; +#endif + std::bool_constant::value; + std::bool_constant::value; + std::bool_constant::value; + }; +#endif -} // end namespace detail +} // namespace detail -template -class extents +struct layout_stride { + template + class mapping #if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : private detail::__no_unique_address_emulation< - detail::__partially_static_sizes_tagged> + : private detail::__no_unique_address_emulation< + detail::__compressed_pair< + Extents, + detail::possibly_empty_array + > + > #endif -{ -public: + { + public: + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_stride; - using rank_type = size_t; - using index_type = ThisIndexType; - static_assert(std::is_integral::value && !std::is_same::value, - "std::extents requires a signed or unsigned integer as index_type parameter"); - using size_type = make_unsigned_t; + // This could be a `requires`, but I think it's better and clearer as a `static_assert`. + static_assert(detail::__is_extents_v, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::layout_stride::mapping must be instantiated with a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); -// internal typedefs which for technical reasons are public - using __storage_t = detail::__partially_static_sizes_tagged; -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - _MDSPAN_NO_UNIQUE_ADDRESS __storage_t __storage_; -#else - using __base_t = detail::__no_unique_address_emulation<__storage_t>; -#endif + private: -// private members dealing with the way we internally store dynamic extents - private: + //---------------------------------------------------------------------------- + + using __strides_storage_t = detail::possibly_empty_array; + using __member_pair_t = detail::__compressed_pair; - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - __storage_t& __storage() noexcept { #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __storage_; + _MDSPAN_NO_UNIQUE_ADDRESS __member_pair_t __members; #else - return this->__base_t::__ref(); + using __base_t = detail::__no_unique_address_emulation<__member_pair_t>; #endif - } - MDSPAN_FORCE_INLINE_FUNCTION - constexpr __storage_t const& __storage() const noexcept { + + MDSPAN_FORCE_INLINE_FUNCTION constexpr __strides_storage_t const& + __strides_storage() const noexcept { #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __storage_; + return __members.__second(); #else - return this->__base_t::__ref(); + return this->__base_t::__ref().__second(); #endif - } + } + MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 __strides_storage_t& + __strides_storage() noexcept { +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + return __members.__second(); +#else + return this->__base_t::__ref().__second(); +#endif + } - template - MDSPAN_FORCE_INLINE_FUNCTION - static constexpr - std::size_t _static_extent_impl(size_t n, std::integer_sequence) noexcept { - return _MDSPAN_FOLD_PLUS_RIGHT(((Idxs == n) ? Extents : 0), /* + ... + */ 0); - } + template + _MDSPAN_HOST_DEVICE + constexpr index_type __get_size(::MDSPAN_IMPL_STANDARD_NAMESPACE::extents,std::integer_sequence) const { + return _MDSPAN_FOLD_TIMES_RIGHT( static_cast(extents().extent(Idx)), 1 ); + } - template - friend class extents; + //---------------------------------------------------------------------------- - template - MDSPAN_INLINE_FUNCTION - constexpr bool _eq_impl(std::experimental::extents, false_type, index_sequence) const noexcept { return false; } - template - MDSPAN_INLINE_FUNCTION - constexpr bool _eq_impl( - std::experimental::extents other, - true_type, index_sequence - ) const noexcept { - return _MDSPAN_FOLD_AND( - (__storage().template __get_n() == other.__storage().template __get_n()) /* && ... */ - ); - } + template + friend class mapping; - template - MDSPAN_INLINE_FUNCTION - constexpr bool _not_eq_impl(std::experimental::extents, false_type, index_sequence) const noexcept { return true; } - template - MDSPAN_INLINE_FUNCTION - constexpr bool _not_eq_impl( - std::experimental::extents other, - true_type, index_sequence - ) const noexcept { - return _MDSPAN_FOLD_OR( - (__storage().template __get_n() != other.__storage().template __get_n()) /* || ... */ - ); - } + //---------------------------------------------------------------------------- -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - MDSPAN_INLINE_FUNCTION constexpr explicit - extents(__base_t&& __b) noexcept - : __base_t(::std::move(__b)) - { } + // Workaround for non-deducibility of the index sequence template parameter if it's given at the top level + template + struct __deduction_workaround; + + template + struct __deduction_workaround> + { + template + MDSPAN_INLINE_FUNCTION + static constexpr bool _eq_impl(mapping const& self, mapping const& other) noexcept { + using common_t = std::common_type_t; + return _MDSPAN_FOLD_AND((static_cast(self.stride(Idxs)) == static_cast(other.stride(Idxs))) /* && ... */) + && _MDSPAN_FOLD_AND((static_cast(self.extents().extent(Idxs)) == static_cast(other.extents().extent(Idxs))) /* || ... */); + } + template + MDSPAN_INLINE_FUNCTION + static constexpr bool _not_eq_impl(mapping const& self, mapping const& other) noexcept { + using common_t = std::common_type_t; + return _MDSPAN_FOLD_OR((static_cast(self.stride(Idxs)) != static_cast(other.stride(Idxs))) /* || ... */) + || _MDSPAN_FOLD_OR((static_cast(self.extents().extent(Idxs)) != static_cast(other.extents().extent(Idxs))) /* || ... */); + } + + template + MDSPAN_FORCE_INLINE_FUNCTION + static constexpr size_t _call_op_impl(mapping const& self, Integral... idxs) noexcept { + return _MDSPAN_FOLD_PLUS_RIGHT((idxs * self.stride(Idxs)), /* + ... + */ 0); + } + + MDSPAN_INLINE_FUNCTION + static constexpr size_t _req_span_size_impl(mapping const& self) noexcept { + // assumes no negative strides; not sure if I'm allowed to assume that or not + return __impl::_call_op_impl(self, (self.extents().template __extent() - 1)...) + 1; + } + + template + MDSPAN_INLINE_FUNCTION + static constexpr const __strides_storage_t fill_strides(const OtherMapping& map) { + return __strides_storage_t{static_cast(map.stride(Idxs))...}; + } + + MDSPAN_INLINE_FUNCTION + static constexpr const __strides_storage_t& fill_strides(const __strides_storage_t& s) { + return s; + } + + template + MDSPAN_INLINE_FUNCTION + static constexpr const __strides_storage_t fill_strides(const std::array& s) { + return __strides_storage_t{static_cast(s[Idxs])...}; + } + + MDSPAN_TEMPLATE_REQUIRES( + class IntegralType, + // The is_convertible condition is added to make sfinae valid + // the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array + (std::is_convertible::value && (extents_type::rank() > 0)) + ) + MDSPAN_INLINE_FUNCTION + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + static constexpr const __strides_storage_t fill_strides(mdspan_non_standard_tag, const IntegralType (&s)[extents_type::rank()>0?extents_type::rank():1]) { + return __strides_storage_t{static_cast(s[Idxs])...}; + } + +#ifdef __cpp_lib_span + template + MDSPAN_INLINE_FUNCTION + static constexpr const __strides_storage_t fill_strides(const std::span& s) { + return __strides_storage_t{static_cast(s[Idxs])...}; + } #endif + MDSPAN_INLINE_FUNCTION + static constexpr std::array return_strides(const __strides_storage_t& s) { + return std::array{s[Idxs]...}; + } -// public interface: -public: - /* Defined above for use in the private code - using rank_type = size_t; - using index_type = ThisIndexType; - */ + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t __return_zero() { return 0; } - MDSPAN_INLINE_FUNCTION - static constexpr rank_type rank() noexcept { return sizeof...(Extents); } - MDSPAN_INLINE_FUNCTION - static constexpr rank_type rank_dynamic() noexcept { return _MDSPAN_FOLD_PLUS_RIGHT((rank_type(Extents == dynamic_extent)), /* + ... + */ 0); } + template + MDSPAN_INLINE_FUNCTION + static constexpr typename Mapping::index_type + __OFFSET(const Mapping& m) { return m(__return_zero()...); } + }; - //-------------------------------------------------------------------------------- - // Constructors, Destructors, and Assignment + // Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. + using __impl = __deduction_workaround>; - // Default constructor - MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr extents() noexcept = default; + static constexpr __strides_storage_t strides_storage(detail::with_rank<0>) { + return {}; + } + template + static constexpr __strides_storage_t strides_storage(detail::with_rank) { + __strides_storage_t s{}; + + extents_type e; + index_type stride = 1; + for(int r = static_cast(extents_type::rank() - 1); r >= 0; r--) { + s[r] = stride; + stride *= e.extent(r); + } - // Converting constructor - MDSPAN_TEMPLATE_REQUIRES( - class OtherIndexType, size_t... OtherExtents, - /* requires */ ( - /* multi-stage check to protect from invalid pack expansion when sizes don't match? */ - decltype(detail::_check_compatible_extents( - std::integral_constant{}, - std::integer_sequence{}, - std::integer_sequence{} - ))::value - ) - ) - MDSPAN_INLINE_FUNCTION - MDSPAN_CONDITIONAL_EXPLICIT( - (((Extents != dynamic_extent) && (OtherExtents == dynamic_extent)) || ...) || - (std::numeric_limits::max() < std::numeric_limits::max())) - constexpr extents(const extents& __other) - noexcept + return s; + } + + //---------------------------------------------------------------------------- + +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + MDSPAN_INLINE_FUNCTION constexpr explicit + mapping(__member_pair_t&& __m) : __members(::std::move(__m)) {} +#else + MDSPAN_INLINE_FUNCTION constexpr explicit + mapping(__base_t&& __b) : __base_t(::std::move(__b)) {} +#endif + + public: + + //-------------------------------------------------------------------------------- + + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __storage_{ + : __members{ #else - : __base_t(__base_t{__storage_t{ + : __base_t(__base_t{__member_pair_t( #endif - __other.__storage().__enable_psa_conversion() + extents_type(), + __strides_storage_t(strides_storage(detail::with_rank{})) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } + } #else - }}) + )}) #endif - { - /* TODO: precondition check - * other.extent(r) equals Er for each r for which Er is a static extent, and - * either - * - sizeof...(OtherExtents) is zero, or - * - other.extent(r) is a representable value of type index_type for all rank index r of other - */ - } + {} + + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(mapping const&) noexcept = default; -#ifdef __NVCC__ - MDSPAN_TEMPLATE_REQUIRES( - class... Integral, - /* requires */ ( - // TODO: check whether the other version works with newest NVCC, doesn't with 11.4 - // NVCC seems to pick up rank_dynamic from the wrong extents type??? - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, Integral, index_type) /* && ... */) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, Integral) /* && ... */) && - // NVCC chokes on the fold thingy here so wrote the workaround - ((sizeof...(Integral) == detail::_count_dynamic_extents::val) || - (sizeof...(Integral) == sizeof...(Extents))) - ) - ) -#else MDSPAN_TEMPLATE_REQUIRES( - class... Integral, - /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, Integral, index_type) /* && ... */) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, Integral) /* && ... */) && - ((sizeof...(Integral) == rank_dynamic()) || (sizeof...(Integral) == rank())) + class IntegralTypes, + /* requires */ ( + // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type + // error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' + _MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t&, typename Extents::index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t&) ) ) -#endif - MDSPAN_INLINE_FUNCTION - explicit constexpr extents(Integral... exts) noexcept + MDSPAN_INLINE_FUNCTION + constexpr + mapping( + extents_type const& e, + std::array const& s + ) noexcept #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __storage_{ + : __members{ #else - : __base_t(__base_t{typename __base_t::__stored_type{ + : __base_t(__base_t{__member_pair_t( #endif - std::conditional_t(), - static_cast(exts)... + e, __strides_storage_t(__impl::fill_strides(s)) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } + } #else - }}) + )}) #endif - { - /* TODO: precondition check - * If sizeof...(IndexTypes) != rank_dynamic() is true, exts_arr[r] equals Er for each r for which Er is a static extent, and - * either - * - sizeof...(exts) == 0 is true, or - * - each element of exts is nonnegative and is a representable value of type index_type. - */ - } + { + /* + * TODO: check preconditions + * - s[i] > 0 is true for all i in the range [0, rank_ ). + * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). + * - If rank_ is greater than 0, then there exists a permutation P of the integers in the + * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for + * all i in the range [1, rank_ ), where pi is the ith element of P. + */ + } - // TODO: check whether this works with newest NVCC, doesn't with 11.4 -#ifdef __NVCC__ - // NVCC seems to pick up rank_dynamic from the wrong extents type??? - // NVCC chokes on the fold thingy here so wrote the workaround - MDSPAN_TEMPLATE_REQUIRES( - class IndexType, size_t N, - /* requires */ ( - _MDSPAN_TRAIT(is_convertible, IndexType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, IndexType) && - ((N == detail::_count_dynamic_extents::val) || - (N == sizeof...(Extents))) - ) - ) -#else MDSPAN_TEMPLATE_REQUIRES( - class IndexType, size_t N, - /* requires */ ( - _MDSPAN_TRAIT(is_convertible, IndexType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, IndexType) && - (N == rank() || N == rank_dynamic()) + class IntegralTypes, + /* requires */ ( + // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type + // error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' + _MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t&, typename Extents::index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t&) && + (Extents::rank() > 0) + ) ) - ) -#endif - MDSPAN_CONDITIONAL_EXPLICIT(N != rank_dynamic()) - MDSPAN_INLINE_FUNCTION - constexpr - extents(std::array const& exts) noexcept + MDSPAN_INLINE_FUNCTION + constexpr + mapping( + mdspan_non_standard_tag, + extents_type const& e, + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1] + ) noexcept #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __storage_{ + : __members{ #else - : __base_t(__base_t{typename __base_t::__stored_type{ + : __base_t(__base_t{__member_pair_t( #endif - std::conditional_t, - detail::__construct_psa_from_all_exts_array_tag_t>(), - std::array{exts} + e, __strides_storage_t(__impl::fill_strides(mdspan_non_standard, s)) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } + } #else - }}) + )}) #endif - { - /* TODO: precondition check - * If N != rank_dynamic() is true, exts[r] equals Er for each r for which Er is a static extent, and - * either - * - N is zero, or - * - exts[r] is nonnegative and is a representable value of type index_type for all rank index r - */ - } + { + /* + * TODO: check preconditions + * - s[i] > 0 is true for all i in the range [0, rank_ ). + * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). + * - If rank_ is greater than 0, then there exists a permutation P of the integers in the + * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for + * all i in the range [1, rank_ ), where pi is the ith element of P. + */ + } #ifdef __cpp_lib_span - // TODO: check whether the below works with newest NVCC, doesn't with 11.4 -#ifdef __NVCC__ - // NVCC seems to pick up rank_dynamic from the wrong extents type??? - // NVCC chokes on the fold thingy here so wrote the workaround - MDSPAN_TEMPLATE_REQUIRES( - class IndexType, size_t N, - /* requires */ ( - _MDSPAN_TRAIT(is_convertible, IndexType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, IndexType) && - ((N == detail::_count_dynamic_extents::val) || - (N == sizeof...(Extents))) - ) - ) -#else MDSPAN_TEMPLATE_REQUIRES( - class IndexType, size_t N, - /* requires */ ( - _MDSPAN_TRAIT(is_convertible, IndexType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, IndexType) && - (N == rank() || N == rank_dynamic()) + class IntegralTypes, + /* requires */ ( + // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type + // error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' + _MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t&, typename Extents::index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t&) + ) ) - ) -#endif - MDSPAN_CONDITIONAL_EXPLICIT(N != rank_dynamic()) - MDSPAN_INLINE_FUNCTION - constexpr - extents(std::span exts) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __storage_{ -#else - : __base_t(__base_t{typename __base_t::__stored_type{ -#endif - std::conditional_t, - detail::__construct_psa_from_all_exts_array_tag_t>(), - exts + MDSPAN_INLINE_FUNCTION + constexpr + mapping( + extents_type const& e, + std::span const& s + ) noexcept #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } + : __members{ #else - }}) -#endif - { - /* TODO: precondition check - * If N != rank_dynamic() is true, exts[r] equals Er for each r for which Er is a static extent, and - * either - * - N is zero, or - * - exts[r] is nonnegative and is a representable value of type index_type for all rank index r - */ - } + : __base_t(__base_t{__member_pair_t( #endif - - // Need this constructor for some submdspan implementation stuff - // for the layout_stride case where I use an extents object for strides - MDSPAN_INLINE_FUNCTION - constexpr explicit - extents(__storage_t const& sto ) noexcept + e, __strides_storage_t(__impl::fill_strides(s)) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __storage_{ + } #else - : __base_t(__base_t{ + )}) #endif - sto -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - }) + { + /* + * TODO: check preconditions + * - s[i] > 0 is true for all i in the range [0, rank_ ). + * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). + * - If rank_ is greater than 0, then there exists a permutation P of the integers in the + * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for + * all i in the range [1, rank_ ), where pi is the ith element of P. + */ + } +#endif // __cpp_lib_span + +#if !(defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20) + MDSPAN_TEMPLATE_REQUIRES( + class StridedLayoutMapping, + /* requires */ ( + _MDSPAN_TRAIT(std::is_constructible, extents_type, typename StridedLayoutMapping::extents_type) && + detail::__is_mapping_of && + StridedLayoutMapping::is_always_unique() && + StridedLayoutMapping::is_always_strided() + ) + ) +#else + template + requires( + detail::__layout_mapping_alike && + _MDSPAN_TRAIT(std::is_constructible, extents_type, typename StridedLayoutMapping::extents_type) && + StridedLayoutMapping::is_always_unique() && + StridedLayoutMapping::is_always_strided() + ) #endif - { } + MDSPAN_CONDITIONAL_EXPLICIT( + !(std::is_convertible::value && + (detail::__is_mapping_of || + detail::__is_mapping_of || + detail::__is_mapping_of)) + ) // needs two () due to comma + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(StridedLayoutMapping const& other) noexcept // NOLINT(google-explicit-constructor) +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + : __members{ +#else + : __base_t(__base_t{__member_pair_t( +#endif + other.extents(), __strides_storage_t(__impl::fill_strides(other)) +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + } +#else + )}) +#endif + { + /* + * TODO: check preconditions + * - other.stride(i) > 0 is true for all i in the range [0, rank_ ). + * - other.required_span_size() is a representable value of type index_type ([basic.fundamental]). + * - OFFSET(other) == 0 + */ + } - //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- - MDSPAN_INLINE_FUNCTION - static constexpr - size_t static_extent(size_t n) noexcept { - // Can't do assert here since that breaks true constexpr ness - // assert(n{}); - } + MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED + mapping& operator=(mapping const&) noexcept = default; - MDSPAN_INLINE_FUNCTION - constexpr - index_type extent(size_t n) const noexcept { - // Can't do assert here since that breaks true constexpr ness - // assert(n__base_t::__ref().__first(); +#endif + }; - //-------------------------------------------------------------------------------- + MDSPAN_INLINE_FUNCTION + constexpr std::array< index_type, extents_type::rank() > strides() const noexcept { + return __impl::return_strides(__strides_storage()); + } - template - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator==(extents const& lhs, extents const& rhs) noexcept { - return lhs._eq_impl( - rhs, std::integral_constant{}, - make_index_sequence{} - ); - } + MDSPAN_INLINE_FUNCTION + constexpr index_type required_span_size() const noexcept { + index_type span_size = 1; + for(unsigned r = 0; r < extents_type::rank(); r++) { + // Return early if any of the extents are zero + if(extents().extent(r)==0) return 0; + span_size += ( static_cast(extents().extent(r) - 1 ) * __strides_storage()[r]); + } + return span_size; + } -#if !(MDSPAN_HAS_CXX_20) - template - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator!=(extents const& lhs, extents const& rhs) noexcept { - return lhs._not_eq_impl( - rhs, std::integral_constant{}, - make_index_sequence{} - ); - } -#endif - // End of public interface + MDSPAN_TEMPLATE_REQUIRES( + class... Indices, + /* requires */ ( + sizeof...(Indices) == Extents::rank() && + (detail::are_valid_indices()) + ) + ) + MDSPAN_FORCE_INLINE_FUNCTION + constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG + return static_cast(__impl::_call_op_impl(*this, static_cast(idxs)...)); + } -public: // (but not really) + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return false; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } - MDSPAN_INLINE_FUNCTION static constexpr - extents __make_extents_impl(detail::__partially_static_sizes&& __bs) noexcept { - // This effectively amounts to a sideways cast that can be done in a constexpr - // context, but we have to do it to handle the case where the extents and the - // strides could accidentally end up with the same types in their hierarchies - // somehow (which would cause layout_stride::mapping to not be standard_layout) - return extents( -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - __base_t{ -#endif - ::std::move(__bs.template __with_tag()) -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#endif - ); - } + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } - template - MDSPAN_FORCE_INLINE_FUNCTION - constexpr - index_type __extent() const noexcept { - return __storage().template __get_n(); - } + private: + constexpr bool exhaustive_for_nonzero_span_size() const + { + return required_span_size() == __get_size(extents(), std::make_index_sequence()); + } - template - MDSPAN_INLINE_FUNCTION - static constexpr - index_type __static_extent() noexcept { - return __storage_t::template __get_static_n(); - } + constexpr bool is_exhaustive_impl(detail::with_rank<0>) const + { + return true; + } + constexpr bool is_exhaustive_impl(detail::with_rank<1>) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } + return stride(0) == 1; + } + template + constexpr bool is_exhaustive_impl(detail::with_rank) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } -}; + rank_type r_largest = 0; + for (rank_type r = 1; r < extents_type::rank(); r++) { + if (stride(r) > stride(r_largest)) { + r_largest = r; + } + } + for (rank_type r = 0; r < extents_type::rank(); r++) { + if (extents().extent(r) == 0 && r != r_largest) { + return false; + } + } + return true; + } -namespace detail { + public: + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { + return is_exhaustive_impl(detail::with_rank{}); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } -template > -struct __make_dextents; -template -struct __make_dextents> { - using type = typename __make_dextents>::type; -}; + MDSPAN_INLINE_FUNCTION + constexpr index_type stride(rank_type r) const noexcept { + return __strides_storage()[r]; + } -template -struct __make_dextents> { - using type = ::std::experimental::extents; -}; +#if !(defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20) + MDSPAN_TEMPLATE_REQUIRES( + class StridedLayoutMapping, + /* requires */ ( + detail::__is_mapping_of && + (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && + StridedLayoutMapping::is_always_strided() + ) + ) +#else + template + requires( + detail::__layout_mapping_alike && + (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && + StridedLayoutMapping::is_always_strided() + ) +#endif + MDSPAN_INLINE_FUNCTION + friend constexpr bool operator==(const mapping& x, const StridedLayoutMapping& y) noexcept { + return (x.extents() == y.extents()) && + (__impl::__OFFSET(y) == static_cast(0)) && + detail::rankwise_equal(detail::with_rank{}, x, y, detail::stride); + } -} // end namespace detail + // This one is not technically part of the proposal. Just here to make implementation a bit more optimal hopefully + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( + (extents_type::rank() == OtherExtents::rank()) + ) + ) + MDSPAN_INLINE_FUNCTION + friend constexpr bool operator==(mapping const& lhs, mapping const& rhs) noexcept { + return __impl::_eq_impl(lhs, rhs); + } -template -using dextents = typename detail::__make_dextents::type; +#if !MDSPAN_HAS_CXX_20 + MDSPAN_TEMPLATE_REQUIRES( + class StridedLayoutMapping, + /* requires */ ( + detail::__is_mapping_of && + (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && + StridedLayoutMapping::is_always_strided() + ) + ) + MDSPAN_INLINE_FUNCTION + friend constexpr bool operator!=(const mapping& x, const StridedLayoutMapping& y) noexcept { + return not (x == y); + } -#if defined(_MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION) -template -extents(IndexTypes...) - -> extents; + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( + (extents_type::rank() == OtherExtents::rank()) + ) + ) + MDSPAN_INLINE_FUNCTION + friend constexpr bool operator!=(mapping const& lhs, mapping const& rhs) noexcept { + return __impl::_not_eq_impl(lhs, rhs); + } #endif + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } + }; +}; + namespace detail { -template -struct __is_extents : ::std::false_type {}; +template +constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapping&) +{} -template -struct __is_extents<::std::experimental::extents> : ::std::true_type {}; +template +constexpr void validate_strides(with_rank, Layout, const Extents& ext, const Mapping& other) +{ + static_assert(std::is_same::value and + (std::is_same::value or + std::is_same::value) + , "This function is only intended to validate construction of " + "a layout_left or layout_right mapping from a layout_stride mapping."); -template -static constexpr bool __is_extents_v = __is_extents::value; + constexpr auto is_left = std::is_same::value; + typename Extents::index_type stride = 1; -template -struct __extents_to_partially_static_sizes; + for (std::size_t r = 0; r < N; r++) { + const std::size_t s = is_left ? r : N - 1 - r; -template -struct __extents_to_partially_static_sizes<::std::experimental::extents> { - using type = detail::__partially_static_sizes< - typename ::std::experimental::extents::index_type, size_t, - ExtentsPack...>; -}; + MDSPAN_IMPL_PRECONDITION(common_integral_compare(stride, other.stride(s)) + and "invalid strides for layout_{left,right}"); -template -using __extents_to_partially_static_sizes_t = typename __extents_to_partially_static_sizes::type; + stride *= ext.extent(s); + } +} -} // end namespace detail -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/extents.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_stride.hpp -/* +} // namespace detail +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_stride.hpp +#if MDSPAN_HAS_CXX_17 +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ +#include +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) -#endif +template +struct layout_left_padded { + template + class mapping; +}; -#ifdef __cpp_lib_span -#endif -#if defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 -#endif +template +struct layout_right_padded { + template + class mapping; +}; -namespace std { -namespace experimental { +namespace detail { +// The layout_padded_constants structs are only useful if rank > 1, otherwise they may wrap +template +struct layout_padded_constants; -struct layout_left { - template - class mapping; +template +struct layout_padded_constants, _ExtentsType> +{ + using rank_type = typename _ExtentsType::rank_type; + static constexpr rank_type padded_stride_idx = 1; + static constexpr rank_type extent_to_pad_idx = 0; }; -struct layout_right { - template - class mapping; + +template +struct layout_padded_constants, _ExtentsType> +{ + using rank_type = typename _ExtentsType::rank_type; + static constexpr rank_type padded_stride_idx = _ExtentsType::rank() - 2; + static constexpr rank_type extent_to_pad_idx = _ExtentsType::rank() - 1; }; -namespace detail { - template - constexpr bool __is_mapping_of = - is_same, Mapping>::value; +template +struct is_layout_left_padded : std::false_type {}; -#if defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20 - template - concept __layout_mapping_alike = requires { - requires __is_extents::value; - { M::is_always_strided() } -> same_as; - { M::is_always_exhaustive() } -> same_as; - { M::is_always_unique() } -> same_as; - bool_constant::value; - bool_constant::value; - bool_constant::value; - }; -#endif -} // namespace detail +template +struct is_layout_left_padded> : std::true_type {}; -struct layout_stride { - template - class mapping -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : private detail::__no_unique_address_emulation< - detail::__compressed_pair< - Extents, - std::array - > - > -#endif - { - public: - using extents_type = Extents; - using index_type = typename extents_type::index_type; - using size_type = typename extents_type::size_type; - using rank_type = typename extents_type::rank_type; - using layout_type = layout_stride; - - // This could be a `requires`, but I think it's better and clearer as a `static_assert`. - static_assert(detail::__is_extents_v, "std::experimental::layout_stride::mapping must be instantiated with a specialization of std::experimental::extents."); - - - private: - - //---------------------------------------------------------------------------- - - using __strides_storage_t = array;//::std::experimental::dextents; - using __member_pair_t = detail::__compressed_pair; +template +struct is_layout_left_padded_mapping : std::false_type {}; -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - _MDSPAN_NO_UNIQUE_ADDRESS __member_pair_t __members; -#else - using __base_t = detail::__no_unique_address_emulation<__member_pair_t>; -#endif - - MDSPAN_FORCE_INLINE_FUNCTION constexpr __strides_storage_t const& - __strides_storage() const noexcept { -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __members.__second(); -#else - return this->__base_t::__ref().__second(); -#endif - } - MDSPAN_FORCE_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 __strides_storage_t& - __strides_storage() noexcept { -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __members.__second(); -#else - return this->__base_t::__ref().__second(); -#endif - } +template +struct is_layout_left_padded_mapping<_Mapping, + std::enable_if_t::template mapping>::value>> + : std::true_type {}; - //---------------------------------------------------------------------------- +template +struct is_layout_right_padded : std::false_type {}; - template - friend class mapping; +template +struct is_layout_right_padded> : std::true_type {}; - //---------------------------------------------------------------------------- +template +struct is_layout_right_padded_mapping : std::false_type {}; - // Workaround for non-deducibility of the index sequence template parameter if it's given at the top level - template - struct __deduction_workaround; +template +struct is_layout_right_padded_mapping<_Mapping, + std::enable_if_t::template mapping>::value>> + : std::true_type {}; - template - struct __deduction_workaround> - { - template - MDSPAN_INLINE_FUNCTION - static constexpr bool _eq_impl(mapping const& self, mapping const& other) noexcept { - return _MDSPAN_FOLD_AND((self.stride(Idxs) == other.stride(Idxs)) /* && ... */); - } - template - MDSPAN_INLINE_FUNCTION - static constexpr bool _not_eq_impl(mapping const& self, mapping const& other) noexcept { - return _MDSPAN_FOLD_OR((self.stride(Idxs) != other.stride(Idxs)) /* || ... */); - } - template - MDSPAN_FORCE_INLINE_FUNCTION - static constexpr size_t _call_op_impl(mapping const& self, Integral... idxs) noexcept { - return _MDSPAN_FOLD_PLUS_RIGHT((idxs * self.stride(Idxs)), /* + ... + */ 0); - } +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>) {} - MDSPAN_INLINE_FUNCTION - static constexpr size_t _req_span_size_impl(mapping const& self) noexcept { - // assumes no negative strides; not sure if I'm allowed to assume that or not - return __impl::_call_op_impl(self, (self.extents().template __extent() - 1)...) + 1; - } +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>) {} - template - MDSPAN_INLINE_FUNCTION - static constexpr const __strides_storage_t fill_strides(const OtherMapping& map) { - return __strides_storage_t{static_cast(map.stride(Idxs))...}; - } +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank) +{ + using extents_type = typename _PaddedLayoutMappingType::extents_type; + constexpr auto padding_value = _PaddedLayoutMappingType::padding_value; + constexpr auto idx = layout_padded_constants::extent_to_pad_idx; + + constexpr auto statically_determinable = + (_LayoutExtentsType::static_extent(idx) != dynamic_extent) && + (extents_type::static_extent(idx) != dynamic_extent) && + (padding_value != dynamic_extent); + + static_assert(not statically_determinable or + (padding_value == 0 + ? _LayoutExtentsType::static_extent(idx) == 0 + : _LayoutExtentsType::static_extent(idx) % padding_value == 0), + ""); +} - MDSPAN_INLINE_FUNCTION - static constexpr const __strides_storage_t& fill_strides(const __strides_storage_t& s) { - return s; - } +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank, + const _OtherMapping &other_mapping) { + constexpr auto padded_stride_idx = + layout_padded_constants::padded_stride_idx; + constexpr auto extent_to_pad_idx = layout_padded_constants::extent_to_pad_idx; + MDSPAN_IMPL_PRECONDITION(other_mapping.stride(padded_stride_idx) == other_mapping.extents().extent(extent_to_pad_idx)); +} - template - MDSPAN_INLINE_FUNCTION - static constexpr const __strides_storage_t fill_strides(const array& s) { - return __strides_storage_t{static_cast(s[Idxs])...}; - } -#ifdef __cpp_lib_span - template - MDSPAN_INLINE_FUNCTION - static constexpr const __strides_storage_t fill_strides(const span& s) { - return __strides_storage_t{static_cast(s[Idxs])...}; - } +} +} +} +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp #endif - MDSPAN_INLINE_FUNCTION - static constexpr const __strides_storage_t fill_strides( - detail::__extents_to_partially_static_sizes_t< - ::std::experimental::dextents>&& s) { - return __strides_storage_t{static_cast(s.template __get_n())...}; - } +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { - template - MDSPAN_INLINE_FUNCTION - static constexpr size_t __return_zero() { return 0; } +//============================================================================== +template +class layout_right::mapping { + public: + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_right; + private: - template - MDSPAN_INLINE_FUNCTION - static constexpr typename Mapping::index_type - __OFFSET(const Mapping& m) { return m(__return_zero()...); } - }; + static_assert(detail::__is_extents_v, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::layout_right::mapping must be instantiated with a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); - // Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. - using __impl = __deduction_workaround>; + template + friend class mapping; + // i0+(i1 + E(1)*(i2 + E(2)*i3)) + template + struct __rank_count {}; - //---------------------------------------------------------------------------- + template + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset( + index_type offset, __rank_count, const I& i, Indices... idx) const { + return __compute_offset(offset * __extents.extent(r) + i,__rank_count(), idx...); + } -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - MDSPAN_INLINE_FUNCTION constexpr explicit - mapping(__member_pair_t&& __m) : __members(::std::move(__m)) {} -#else - MDSPAN_INLINE_FUNCTION constexpr explicit - mapping(__base_t&& __b) : __base_t(::std::move(__b)) {} -#endif + template + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset( + __rank_count<0,extents_type::rank()>, const I& i, Indices... idx) const { + return __compute_offset(i,__rank_count<1,extents_type::rank()>(),idx...); + } - public: // but not really - MDSPAN_INLINE_FUNCTION - static constexpr mapping - __make_mapping( - detail::__extents_to_partially_static_sizes_t&& __exts, - detail::__extents_to_partially_static_sizes_t< - ::std::experimental::dextents>&& __strs - ) noexcept { - // call the private constructor we created for this purpose - return mapping( -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - __base_t{ -#endif - __member_pair_t( - extents_type::__make_extents_impl(::std::move(__exts)), - __strides_storage_t{__impl::fill_strides(::std::move(__strs))} - ) -#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#endif - ); + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset(size_t offset, __rank_count) const { + return static_cast(offset); } - //---------------------------------------------------------------------------- + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset(__rank_count<0,0>) const { return 0; } public: @@ -3887,413 +3240,90 @@ struct layout_stride { MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept = default; MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(mapping const&) noexcept = default; + _MDSPAN_HOST_DEVICE + constexpr mapping(extents_type const& __exts) noexcept + :__extents(__exts) + { } + MDSPAN_TEMPLATE_REQUIRES( - class IntegralTypes, + class OtherExtents, /* requires */ ( - // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type - // error C2641: cannot deduce template arguments for 'std::experimental::layout_stride::mapping' - _MDSPAN_TRAIT(is_convertible, const remove_const_t&, typename Extents::index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, typename Extents::index_type, const remove_const_t&) + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) ) ) - MDSPAN_INLINE_FUNCTION - constexpr - mapping( - extents_type const& e, - array const& s - ) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __members{ -#else - : __base_t(__base_t{__member_pair_t( -#endif - e, __strides_storage_t(__impl::fill_strides(s)) -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible::value)) // needs two () due to comma + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(mapping const& other) noexcept // NOLINT(google-explicit-constructor) + :__extents(other.extents()) { - /* - * TODO: check preconditions - * - s[i] > 0 is true for all i in the range [0, rank_ ). - * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). - * - If rank_ is greater than 0, then there exists a permutation P of the integers in the - * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for - * all i in the range [1, rank_ ), where pi is the ith element of P. - */ + /* + * TODO: check precondition + * other.required_span_size() is a representable value of type index_type + */ } -#ifdef __cpp_lib_span MDSPAN_TEMPLATE_REQUIRES( - class IntegralTypes, + class OtherExtents, /* requires */ ( - // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type - // error C2641: cannot deduce template arguments for 'std::experimental::layout_stride::mapping' - _MDSPAN_TRAIT(is_convertible, const remove_const_t&, typename Extents::index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, typename Extents::index_type, const remove_const_t&) + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) && + (extents_type::rank() <= 1) ) ) - MDSPAN_INLINE_FUNCTION - constexpr - mapping( - extents_type const& e, - span const& s - ) noexcept -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __members{ -#else - : __base_t(__base_t{__member_pair_t( -#endif - e, __strides_storage_t(__impl::fill_strides(s)) -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible::value)) // needs two () due to comma + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(layout_left::mapping const& other) noexcept // NOLINT(google-explicit-constructor) + :__extents(other.extents()) { - /* - * TODO: check preconditions - * - s[i] > 0 is true for all i in the range [0, rank_ ). - * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). - * - If rank_ is greater than 0, then there exists a permutation P of the integers in the - * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for - * all i in the range [1, rank_ ), where pi is the ith element of P. - */ + /* + * TODO: check precondition + * other.required_span_size() is a representable value of type index_type + */ } -#endif // __cpp_lib_span -#if !(defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20) + /** + * Converting constructor from `layout_right_padded::mapping`. + * + * This overload participates in overload resolution only if _Mapping is a layout_right_padded mapping and + * extents_type is constructible from _Mapping::extents_type. + * + * \note There is currently a difference from p2642r2, where this function is specified as taking + * `layout_right_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + */ +#if MDSPAN_HAS_CXX_17 MDSPAN_TEMPLATE_REQUIRES( - class StridedLayoutMapping, + class _Mapping, + /* requires */ ( + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail::is_layout_right_padded_mapping<_Mapping>::value + && std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) + mapping(const _Mapping &__other) noexcept + : __extents(__other.extents()) + { + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: + check_padded_layout_converting_constructor_preconditions< + extents_type>(detail::with_rank{}, __other); + } +#endif + + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, typename StridedLayoutMapping::extents_type) && - detail::__is_mapping_of && - StridedLayoutMapping::is_always_unique() && - StridedLayoutMapping::is_always_strided() + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) ) ) -#else - template - requires( - detail::__layout_mapping_alike && - _MDSPAN_TRAIT(is_constructible, extents_type, typename StridedLayoutMapping::extents_type) && - StridedLayoutMapping::is_always_unique() && - StridedLayoutMapping::is_always_strided() - ) -#endif - MDSPAN_CONDITIONAL_EXPLICIT( - (!is_convertible::value) && - (detail::__is_mapping_of || - detail::__is_mapping_of || - detail::__is_mapping_of) - ) // needs two () due to comma + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(StridedLayoutMapping const& other) noexcept // NOLINT(google-explicit-constructor) -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - : __members{ -#else - : __base_t(__base_t{__member_pair_t( -#endif - other.extents(), __strides_storage_t(__impl::fill_strides(other)) -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - } -#else - )}) -#endif - { - /* - * TODO: check preconditions - * - other.stride(i) > 0 is true for all i in the range [0, rank_ ). - * - other.required_span_size() is a representable value of type index_type ([basic.fundamental]). - * - OFFSET(other) == 0 - */ - } - - //-------------------------------------------------------------------------------- - - MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED - mapping& operator=(mapping const&) noexcept = default; - - MDSPAN_INLINE_FUNCTION constexpr const extents_type& extents() const noexcept { -#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) - return __members.__first(); -#else - return this->__base_t::__ref().__first(); -#endif - }; - - MDSPAN_INLINE_FUNCTION - constexpr array< index_type, extents_type::rank() > strides() const noexcept { - return __strides_storage(); - } - - MDSPAN_INLINE_FUNCTION - constexpr index_type required_span_size() const noexcept { - index_type span_size = 1; - for(unsigned r = 0; r < extents_type::rank(); r++) { - // Return early if any of the extents are zero - if(extents().extent(r)==0) return 0; - span_size = std::max(span_size, static_cast(extents().extent(r) * __strides_storage()[r])); - } - return span_size; - } - - - MDSPAN_TEMPLATE_REQUIRES( - class... Indices, - /* requires */ ( - sizeof...(Indices) == Extents::rank() && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, Indices, index_type) /*&& ...*/ ) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, Indices) /*&& ...*/) - ) - ) - MDSPAN_FORCE_INLINE_FUNCTION - constexpr size_t operator()(Indices... idxs) const noexcept { - return __impl::_call_op_impl(*this, static_cast(idxs)...); - } - - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return true; } - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { - return false; - } - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } - - MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { -// TODO @testing test layout_stride is_exhaustive() -// FIXME CUDA -#ifdef __CUDA_ARCH__ - return false; -#else - auto rem = array{ }; - std::iota(rem.begin(), rem.end(), size_t(0)); - auto next_idx_iter = std::find_if( - rem.begin(), rem.end(), - [&](size_t i) { return this->stride(i) == 1; } - ); - if(next_idx_iter != rem.end()) { - size_t prev_stride_times_prev_extent = - this->extents().extent(*next_idx_iter) * this->stride(*next_idx_iter); - // "remove" the index - constexpr auto removed_index_sentinel = static_cast(-1); - *next_idx_iter = removed_index_sentinel; - size_t found_count = 1; - while (found_count != Extents::rank()) { - next_idx_iter = std::find_if( - rem.begin(), rem.end(), - [&](size_t i) { - return i != removed_index_sentinel - && static_cast(this->extents().extent(i)) == prev_stride_times_prev_extent; - } - ); - if (next_idx_iter != rem.end()) { - // "remove" the index - *next_idx_iter = removed_index_sentinel; - ++found_count; - prev_stride_times_prev_extent = stride(*next_idx_iter) * this->extents().extent(*next_idx_iter); - } else { break; } - } - return found_count == Extents::rank(); - } - return false; -#endif - } - MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } - - - MDSPAN_INLINE_FUNCTION - constexpr index_type stride(rank_type r) const noexcept { - return __strides_storage()[r]; - } - -#if !(defined(_MDSPAN_USE_CONCEPTS) && MDSPAN_HAS_CXX_20) - MDSPAN_TEMPLATE_REQUIRES( - class StridedLayoutMapping, - /* requires */ ( - detail::__is_mapping_of && - (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && - StridedLayoutMapping::is_always_strided() - ) - ) -#else - template - requires( - detail::__layout_mapping_alike && - (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && - StridedLayoutMapping::is_always_strided() - ) -#endif - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator==(const mapping& x, const StridedLayoutMapping& y) noexcept { - bool strides_match = true; - for(rank_type r = 0; r < extents_type::rank(); r++) - strides_match = strides_match && (x.stride(r) == y.stride(r)); - return (x.extents() == y.extents()) && - (__impl::__OFFSET(y)== static_cast(0)) && - strides_match; - } - - // This one is not technically part of the proposal. Just here to make implementation a bit more optimal hopefully - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - (extents_type::rank() == OtherExtents::rank()) - ) - ) - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator==(mapping const& lhs, mapping const& rhs) noexcept { - return __impl::_eq_impl(lhs, rhs); - } - -#if !MDSPAN_HAS_CXX_20 - MDSPAN_TEMPLATE_REQUIRES( - class StridedLayoutMapping, - /* requires */ ( - detail::__is_mapping_of && - (extents_type::rank() == StridedLayoutMapping::extents_type::rank()) && - StridedLayoutMapping::is_always_strided() - ) - ) - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator!=(const mapping& x, const StridedLayoutMapping& y) noexcept { - return not (x == y); - } - - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - (extents_type::rank() == OtherExtents::rank()) - ) - ) - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator!=(mapping const& lhs, mapping const& rhs) noexcept { - return __impl::_not_eq_impl(lhs, rhs); - } -#endif - - }; -}; - -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_stride.hpp - -namespace std { -namespace experimental { - -//============================================================================== -template -class layout_right::mapping { - public: - using extents_type = Extents; - using index_type = typename extents_type::index_type; - using size_type = typename extents_type::size_type; - using rank_type = typename extents_type::rank_type; - using layout_type = layout_right; - private: - - static_assert(detail::__is_extents_v, "std::experimental::layout_right::mapping must be instantiated with a specialization of std::experimental::extents."); - - template - friend class mapping; - - // i0+(i1 + E(1)*(i2 + E(2)*i3)) - template - struct __rank_count {}; - - template - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset( - index_type offset, __rank_count, const I& i, Indices... idx) const { - return __compute_offset(offset * __extents.template __extent() + i,__rank_count(), idx...); - } - - template - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset( - __rank_count<0,extents_type::rank()>, const I& i, Indices... idx) const { - return __compute_offset(i,__rank_count<1,extents_type::rank()>(),idx...); - } - - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset(size_t offset, __rank_count) const { - return static_cast(offset); - } - - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset(__rank_count<0,0>) const { return 0; } - - public: - - //-------------------------------------------------------------------------------- - - MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(mapping const&) noexcept = default; - - _MDSPAN_HOST_DEVICE - constexpr mapping(extents_type const& __exts) noexcept - :__extents(__exts) - { } - - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!is_convertible::value)) // needs two () due to comma - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(mapping const& other) noexcept // NOLINT(google-explicit-constructor) - :__extents(other.extents()) - { - /* - * TODO: check precondition - * other.required_span_size() is a representable value of type index_type - */ - } - - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) && - (extents_type::rank() <= 1) - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!is_convertible::value)) // needs two () due to comma - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(layout_left::mapping const& other) noexcept // NOLINT(google-explicit-constructor) - :__extents(other.extents()) - { - /* - * TODO: check precondition - * other.required_span_size() is a representable value of type index_type - */ - } - - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(layout_stride::mapping const& other) // NOLINT(google-explicit-constructor) + mapping(layout_stride::mapping const& other) noexcept // NOLINT(google-explicit-constructor) :__extents(other.extents()) { /* * TODO: check precondition * other.required_span_size() is a representable value of type index_type */ - #ifndef __CUDA_ARCH__ - size_t stride = 1; - for(rank_type r=__extents.rank(); r>0; r--) { - if(stride != other.stride(r-1)) - throw std::runtime_error("Assigning layout_stride to layout_right with invalid strides."); - stride *= __extents.extent(r-1); - } - #endif + detail::validate_strides(detail::with_rank{}, layout_right{}, __extents, other); } MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; @@ -4313,43 +3343,53 @@ class layout_right::mapping { //-------------------------------------------------------------------------------- MDSPAN_TEMPLATE_REQUIRES( - class... Indices, + class ... Indices, /* requires */ ( - (sizeof...(Indices) == extents_type::rank()) && - _MDSPAN_FOLD_AND( - (_MDSPAN_TRAIT(is_convertible, Indices, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, Indices)) - ) + (sizeof...(Indices) == extents_type::rank()) && + (detail::are_valid_indices()) ) ) _MDSPAN_HOST_DEVICE constexpr index_type operator()(Indices... idxs) const noexcept { - return __compute_offset(__rank_count<0, extents_type::rank()>(), idxs...); +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG + return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); } MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return true; } MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { return true; } MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } - MDSPAN_INLINE_FUNCTION constexpr bool is_unique() const noexcept { return true; } - MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { return true; } - MDSPAN_INLINE_FUNCTION constexpr bool is_strided() const noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_exhaustive() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } MDSPAN_INLINE_FUNCTION - constexpr index_type stride(rank_type i) const noexcept { + constexpr index_type stride(rank_type i) const noexcept +#if MDSPAN_HAS_CXX_20 + requires ( Extents::rank() > 0 ) +#endif + { index_type value = 1; for(rank_type r=extents_type::rank()-1; r>i; r--) value*=__extents.extent(r); return value; } - template + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( Extents::rank() == OtherExtents::rank()) + ) MDSPAN_INLINE_FUNCTION friend constexpr bool operator==(mapping const& lhs, mapping const& rhs) noexcept { return lhs.extents() == rhs.extents(); } // In C++ 20 the not equal exists if equal is found -#if MDSPAN_HAS_CXX_20 - template +#if !(MDSPAN_HAS_CXX_20) + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ (Extents::rank() == OtherExtents::rank()) + ) MDSPAN_INLINE_FUNCTION friend constexpr bool operator!=(mapping const& lhs, mapping const& rhs) noexcept { return lhs.extents() != rhs.extents(); @@ -4358,27 +3398,35 @@ class layout_right::mapping { // Not really public, but currently needed to implement fully constexpr useable submdspan: template - constexpr index_type __get_stride(std::experimental::extents,integer_sequence) const { + constexpr index_type __get_stride(MDSPAN_IMPL_STANDARD_NAMESPACE::extents,std::integer_sequence) const { return _MDSPAN_FOLD_TIMES_RIGHT((Idx>N? __extents.template __extent():1),1); } template constexpr index_type __stride() const noexcept { - return __get_stride(__extents, make_index_sequence()); + return __get_stride(__extents, std::make_index_sequence()); } private: _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; -} // end namespace experimental -} // end namespace std +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_right.hpp -namespace std { -namespace experimental { - +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { template < class ElementType, class Extents, @@ -4388,32 +3436,42 @@ template < class mdspan { private: - static_assert(detail::__is_extents_v, "std::experimental::mdspan's Extents template parameter must be a specialization of std::experimental::extents."); + static_assert(detail::__is_extents_v, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's Extents template parameter must be a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); + static_assert(std::is_same::value, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's ElementType template parameter must be the same as its AccessorPolicy::element_type."); // Workaround for non-deducibility of the index sequence template parameter if it's given at the top level template struct __deduction_workaround; template - struct __deduction_workaround> + struct __deduction_workaround> { MDSPAN_FORCE_INLINE_FUNCTION static constexpr size_t __size(mdspan const& __self) noexcept { - return _MDSPAN_FOLD_TIMES_RIGHT((__self.__mapping_ref().extents().template __extent()), /* * ... * */ size_t(1)); + return _MDSPAN_FOLD_TIMES_RIGHT((__self.__mapping_ref().extents().extent(Idxs)), /* * ... * */ size_t(1)); } MDSPAN_FORCE_INLINE_FUNCTION static constexpr bool __empty(mdspan const& __self) noexcept { - return (__self.rank()>0) && _MDSPAN_FOLD_OR((__self.__mapping_ref().extents().template __extent()==index_type(0))); + return (__self.rank()>0) && _MDSPAN_FOLD_OR((__self.__mapping_ref().extents().extent(Idxs)==index_type(0))); } template MDSPAN_FORCE_INLINE_FUNCTION static constexpr - ReferenceType __callop(mdspan const& __self, const array& indices) noexcept { + ReferenceType __callop(mdspan const& __self, const std::array& indices) noexcept { return __self.__accessor_ref().access(__self.__ptr_ref(), __self.__mapping_ref()(indices[Idxs]...)); } - }; - -public: - +#ifdef __cpp_lib_span + template + MDSPAN_FORCE_INLINE_FUNCTION static constexpr + ReferenceType __callop(mdspan const& __self, const std::span& indices) noexcept { + return __self.__accessor_ref().access(__self.__ptr_ref(), __self.__mapping_ref()(indices[Idxs]...)); + } +#endif + }; + +public: + //-------------------------------------------------------------------------------- // Domain and codomain types @@ -4422,7 +3480,7 @@ class mdspan using accessor_type = AccessorPolicy; using mapping_type = typename layout_type::template mapping; using element_type = ElementType; - using value_type = remove_cv_t; + using value_type = std::remove_cv_t; using index_type = typename extents_type::index_type; using size_type = typename extents_type::size_type; using rank_type = typename extents_type::rank_type; @@ -4437,7 +3495,7 @@ class mdspan private: // Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. - using __impl = __deduction_workaround>; + using __impl = __deduction_workaround>; using __map_acc_pair_t = detail::__compressed_pair; @@ -4451,10 +3509,11 @@ class mdspan #else MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mdspan() requires( - (rank_dynamic() > 0) && - _MDSPAN_TRAIT(is_default_constructible, data_handle_type) && - _MDSPAN_TRAIT(is_default_constructible, mapping_type) && - _MDSPAN_TRAIT(is_default_constructible, accessor_type) + // nvhpc has a bug where using just rank_dynamic() here doesn't work ... + (extents_type::rank_dynamic() > 0) && + _MDSPAN_TRAIT(std::is_default_constructible, data_handle_type) && + _MDSPAN_TRAIT(std::is_default_constructible, mapping_type) && + _MDSPAN_TRAIT(std::is_default_constructible, accessor_type) ) = default; #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mdspan(const mdspan&) = default; @@ -4463,11 +3522,10 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeTypes) /* && ... */) && ((sizeof...(SizeTypes) == rank()) || (sizeof...(SizeTypes) == rank_dynamic())) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) && - _MDSPAN_TRAIT(is_default_constructible, accessor_type) + (detail::are_valid_indices()) && + _MDSPAN_TRAIT(std::is_constructible, mapping_type, extents_type) && + _MDSPAN_TRAIT(std::is_default_constructible, accessor_type) ) ) MDSPAN_INLINE_FUNCTION @@ -4479,16 +3537,16 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) && + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) && ((N == rank()) || (N == rank_dynamic())) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) && - _MDSPAN_TRAIT(is_default_constructible, accessor_type) + _MDSPAN_TRAIT(std::is_constructible, mapping_type, extents_type) && + _MDSPAN_TRAIT(std::is_default_constructible, accessor_type) ) ) MDSPAN_CONDITIONAL_EXPLICIT(N != rank_dynamic()) MDSPAN_INLINE_FUNCTION - constexpr mdspan(data_handle_type p, const array& dynamic_extents) + constexpr mdspan(data_handle_type p, const std::array& dynamic_extents) : __members(std::move(p), __map_acc_pair_t(mapping_type(extents_type(dynamic_extents)), accessor_type())) { } @@ -4496,16 +3554,16 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) && + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) && ((N == rank()) || (N == rank_dynamic())) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) && - _MDSPAN_TRAIT(is_default_constructible, accessor_type) + _MDSPAN_TRAIT(std::is_constructible, mapping_type, extents_type) && + _MDSPAN_TRAIT(std::is_default_constructible, accessor_type) ) ) MDSPAN_CONDITIONAL_EXPLICIT(N != rank_dynamic()) MDSPAN_INLINE_FUNCTION - constexpr mdspan(data_handle_type p, span dynamic_extents) + constexpr mdspan(data_handle_type p, std::span dynamic_extents) : __members(std::move(p), __map_acc_pair_t(mapping_type(extents_type(as_const(dynamic_extents))), accessor_type())) { } #endif @@ -4513,15 +3571,15 @@ class mdspan MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), mdspan, (data_handle_type p, const extents_type& exts), , - /* requires */ (_MDSPAN_TRAIT(is_default_constructible, accessor_type) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + /* requires */ (_MDSPAN_TRAIT(std::is_default_constructible, accessor_type) && + _MDSPAN_TRAIT(std::is_constructible, mapping_type, const extents_type&)) ) : __members(std::move(p), __map_acc_pair_t(mapping_type(exts), accessor_type())) { } MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), mdspan, (data_handle_type p, const mapping_type& m), , - /* requires */ (_MDSPAN_TRAIT(is_default_constructible, accessor_type)) + /* requires */ (_MDSPAN_TRAIT(std::is_default_constructible, accessor_type)) ) : __members(std::move(p), __map_acc_pair_t(m, accessor_type())) { } @@ -4533,16 +3591,20 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class OtherElementType, class OtherExtents, class OtherLayoutPolicy, class OtherAccessor, /* requires */ ( - _MDSPAN_TRAIT(is_constructible, mapping_type, typename OtherLayoutPolicy::template mapping) && - _MDSPAN_TRAIT(is_constructible, accessor_type, OtherAccessor) + _MDSPAN_TRAIT(std::is_constructible, mapping_type, const typename OtherLayoutPolicy::template mapping&) && + _MDSPAN_TRAIT(std::is_constructible, accessor_type, const OtherAccessor&) ) ) + MDSPAN_CONDITIONAL_EXPLICIT( + !_MDSPAN_TRAIT(std::is_convertible, const typename OtherLayoutPolicy::template mapping&, mapping_type) || + !_MDSPAN_TRAIT(std::is_convertible, const OtherAccessor&, accessor_type) + ) MDSPAN_INLINE_FUNCTION constexpr mdspan(const mdspan& other) : __members(other.__ptr_ref(), __map_acc_pair_t(other.__mapping_ref(), other.__accessor_ref())) { - static_assert(_MDSPAN_TRAIT(is_constructible, data_handle_type, typename OtherAccessor::data_handle_type),"Incompatible data_handle_type for mdspan construction"); - static_assert(_MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents),"Incompatible extents for mdspan construction"); + static_assert(_MDSPAN_TRAIT(std::is_constructible, data_handle_type, typename OtherAccessor::data_handle_type),"Incompatible data_handle_type for mdspan construction"); + static_assert(_MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents),"Incompatible extents for mdspan construction"); /* * TODO: Check precondition * For each rank index r of extents_type, static_extent(r) == dynamic_extent || static_extent(r) == other.extent(r) is true. @@ -4565,8 +3627,8 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeTypes) /* && ... */) && + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(std::is_convertible, SizeTypes, index_type) /* && ... */) && + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, SizeTypes) /* && ... */) && (rank() == sizeof...(SizeTypes)) ) ) @@ -4580,12 +3642,12 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator[](const array& indices) const + constexpr reference operator[](const std::array< SizeType, rank()>& indices) const { return __impl::template __callop(*this, indices); } @@ -4594,12 +3656,12 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator[](span indices) const + constexpr reference operator[](std::span indices) const { return __impl::template __callop(*this, indices); } @@ -4609,8 +3671,8 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class Index, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, Index, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, Index) && + _MDSPAN_TRAIT(std::is_convertible, Index, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, Index) && extents_type::rank() == 1 ) ) @@ -4625,9 +3687,8 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeTypes) /* && ... */) && - extents_type::rank() == sizeof...(SizeTypes) + extents_type::rank() == sizeof...(SizeTypes) && + (detail::are_valid_indices()) ) ) MDSPAN_FORCE_INLINE_FUNCTION @@ -4639,12 +3700,12 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator()(const array& indices) const + constexpr reference operator()(const std::array& indices) const { return __impl::template __callop(*this, indices); } @@ -4653,19 +3714,19 @@ class mdspan MDSPAN_TEMPLATE_REQUIRES( class SizeType, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && - _MDSPAN_TRAIT(is_nothrow_constructible, index_type, SizeType) + _MDSPAN_TRAIT(std::is_convertible, const SizeType&, index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, index_type, const SizeType&) ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator()(span indices) const + constexpr reference operator()(std::span indices) const { return __impl::template __callop(*this, indices); } #endif // __cpp_lib_span #endif // MDSPAN_USE_PAREN_OPERATOR - MDSPAN_INLINE_FUNCTION constexpr size_t size() const noexcept { + MDSPAN_INLINE_FUNCTION constexpr size_type size() const noexcept { return __impl::__size(*this); }; @@ -4675,9 +3736,17 @@ class mdspan MDSPAN_INLINE_FUNCTION friend constexpr void swap(mdspan& x, mdspan& y) noexcept { + // can't call the std::swap inside on HIP + #if !defined(_MDSPAN_HAS_HIP) && !defined(_MDSPAN_HAS_CUDA) + using std::swap; swap(x.__ptr_ref(), y.__ptr_ref()); swap(x.__mapping_ref(), y.__mapping_ref()); swap(x.__accessor_ref(), y.__accessor_ref()); + #else + mdspan tmp = y; + y = x; + x = tmp; + #endif } //-------------------------------------------------------------------------------- @@ -4692,13 +3761,13 @@ class mdspan //-------------------------------------------------------------------------------- // [mdspan.basic.obs], mdspan observers of the mapping - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return mapping_type::is_always_unique(); }; - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { return mapping_type::is_always_exhaustive(); }; - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return mapping_type::is_always_strided(); }; + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() { return mapping_type::is_always_unique(); }; + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() { return mapping_type::is_always_exhaustive(); }; + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() { return mapping_type::is_always_strided(); }; - MDSPAN_INLINE_FUNCTION constexpr bool is_unique() const noexcept { return __mapping_ref().is_unique(); }; - MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { return __mapping_ref().is_exhaustive(); }; - MDSPAN_INLINE_FUNCTION constexpr bool is_strided() const noexcept { return __mapping_ref().is_strided(); }; + MDSPAN_INLINE_FUNCTION constexpr bool is_unique() const { return __mapping_ref().is_unique(); }; + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const { return __mapping_ref().is_exhaustive(); }; + MDSPAN_INLINE_FUNCTION constexpr bool is_strided() const { return __mapping_ref().is_strided(); }; MDSPAN_INLINE_FUNCTION constexpr index_type stride(size_t r) const { return __mapping_ref().stride(r); }; private: @@ -4720,971 +3789,2016 @@ class mdspan #if defined(_MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION) MDSPAN_TEMPLATE_REQUIRES( class ElementType, class... SizeTypes, - /* requires */ _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_integral, SizeTypes) /* && ... */) && + /* requires */ _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(std::is_convertible, SizeTypes, size_t) /* && ... */) && (sizeof...(SizeTypes) > 0) ) -explicit mdspan(ElementType*, SizeTypes...) - -> mdspan>; +MDSPAN_DEDUCTION_GUIDE explicit mdspan(ElementType*, SizeTypes...) + -> mdspan>; MDSPAN_TEMPLATE_REQUIRES( class Pointer, - (_MDSPAN_TRAIT(is_pointer, std::remove_reference_t)) + (_MDSPAN_TRAIT(std::is_pointer, std::remove_reference_t)) ) -mdspan(Pointer&&) -> mdspan>, extents>; +MDSPAN_DEDUCTION_GUIDE mdspan(Pointer&&) -> mdspan>, extents>; MDSPAN_TEMPLATE_REQUIRES( class CArray, - (_MDSPAN_TRAIT(is_array, CArray) && (rank_v == 1)) + (_MDSPAN_TRAIT(std::is_array, CArray) && (std::rank_v == 1)) ) -mdspan(CArray&) -> mdspan, extents>>; +MDSPAN_DEDUCTION_GUIDE mdspan(CArray&) -> mdspan, extents>>; template -mdspan(ElementType*, const ::std::array&) - -> mdspan>; +MDSPAN_DEDUCTION_GUIDE mdspan(ElementType*, const ::std::array&) + -> mdspan>; #ifdef __cpp_lib_span template -mdspan(ElementType*, ::std::span) - -> mdspan>; +MDSPAN_DEDUCTION_GUIDE mdspan(ElementType*, ::std::span) + -> mdspan>; #endif // This one is necessary because all the constructors take `data_handle_type`s, not // `ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which // seems to throw off automatic deduction guides. template -mdspan(ElementType*, const extents&) - -> mdspan>; +MDSPAN_DEDUCTION_GUIDE mdspan(ElementType*, const extents&) + -> mdspan>; + +template +MDSPAN_DEDUCTION_GUIDE mdspan(ElementType*, const MappingType&) + -> mdspan; + +template +MDSPAN_DEDUCTION_GUIDE mdspan(const typename AccessorType::data_handle_type, const MappingType&, const AccessorType&) + -> mdspan; +#endif + +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/mdspan.hpp +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_left.hpp +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if MDSPAN_HAS_CXX_17 +#endif +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + +//============================================================================== + +template +class layout_left::mapping { + public: + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_left; + private: + + static_assert(detail::__is_extents_v, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::layout_left::mapping must be instantiated with a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); + + template + friend class mapping; + + // i0+(i1 + E(1)*(i2 + E(2)*i3)) + template + struct __rank_count {}; + + template + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset( + __rank_count, const I& i, Indices... idx) const { + return __compute_offset(__rank_count(), idx...) * + __extents.extent(r) + i; + } + + template + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset( + __rank_count, const I& i) const { + return i; + } + + _MDSPAN_HOST_DEVICE + constexpr index_type __compute_offset(__rank_count<0,0>) const { return 0; } + + public: + + //-------------------------------------------------------------------------------- + + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(mapping const&) noexcept = default; + + _MDSPAN_HOST_DEVICE + constexpr mapping(extents_type const& __exts) noexcept + :__extents(__exts) + { } + + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) + ) + ) + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible::value)) // needs two () due to comma + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(mapping const& other) noexcept // NOLINT(google-explicit-constructor) + :__extents(other.extents()) + { + /* + * TODO: check precondition + * other.required_span_size() is a representable value of type index_type + */ + } + + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) && + (extents_type::rank() <= 1) + ) + ) + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible::value)) // needs two () due to comma + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(layout_right::mapping const& other) noexcept // NOLINT(google-explicit-constructor) + :__extents(other.extents()) + { + /* + * TODO: check precondition + * other.required_span_size() is a representable value of type index_type + */ + } + +#if MDSPAN_HAS_CXX_17 + /** + * Converting constructor from `layout_left_padded::mapping`. + * + * This overload participates in overload resolution only if _Mapping is a layout_left_padded mapping and + * extents_type is constructible from _Mapping::extents_type. + * + * \note There is currently a difference from p2642r2, where this function is specified as taking + * `layout_left_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ ( + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail::is_layout_left_padded_mapping<_Mapping>::value + && std::is_constructible_v + ) + ) + MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) + mapping(const _Mapping& __other) noexcept + : __extents(__other.extents()) + { + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); + MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: + check_padded_layout_converting_constructor_preconditions< + extents_type>(detail::with_rank{}, __other); + } +#endif + + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( + _MDSPAN_TRAIT(std::is_constructible, extents_type, OtherExtents) + ) + ) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 + mapping(layout_stride::mapping const& other) noexcept // NOLINT(google-explicit-constructor) + :__extents(other.extents()) + { + /* + * TODO: check precondition + * other.required_span_size() is a representable value of type index_type + */ + detail::validate_strides(detail::with_rank{}, layout_left{}, __extents, other); + } + + MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; + + MDSPAN_INLINE_FUNCTION + constexpr const extents_type& extents() const noexcept { + return __extents; + } + + MDSPAN_INLINE_FUNCTION + constexpr index_type required_span_size() const noexcept { + index_type value = 1; + for(rank_type r=0; r()) + ) + ) + _MDSPAN_HOST_DEVICE + constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG + return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); + } + + + + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } + + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_exhaustive() noexcept { return true; } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } + + MDSPAN_INLINE_FUNCTION + constexpr index_type stride(rank_type i) const noexcept +#if MDSPAN_HAS_CXX_20 + requires ( Extents::rank() > 0 ) +#endif + { + index_type value = 1; + for(rank_type r=0; r const& rhs) noexcept { + return lhs.extents() == rhs.extents(); + } + + // In C++ 20 the not equal exists if equal is found +#if !(MDSPAN_HAS_CXX_20) + MDSPAN_TEMPLATE_REQUIRES( + class OtherExtents, + /* requires */ ( Extents::rank() == OtherExtents::rank()) + ) + MDSPAN_INLINE_FUNCTION + friend constexpr bool operator!=(mapping const& lhs, mapping const& rhs) noexcept { + return lhs.extents() != rhs.extents(); + } +#endif + + // Not really public, but currently needed to implement fully constexpr useable submdspan: + template + constexpr index_type __get_stride(MDSPAN_IMPL_STANDARD_NAMESPACE::extents,std::integer_sequence) const { + return _MDSPAN_FOLD_TIMES_RIGHT((Idx():1),1); + } + template + constexpr index_type __stride() const noexcept { + return __get_stride(__extents, std::make_index_sequence()); + } + +private: + _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } +}; + + +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_left.hpp +#if MDSPAN_HAS_CXX_17 +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2642_bits/layout_padded.hpp +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { + +namespace detail { +template +MDSPAN_INLINE_FUNCTION +constexpr _T +find_next_multiple(_T alignment, _T offset) +{ + if ( alignment == 0 ) { + return _T(0); + } else { + return ( ( offset + alignment - 1 ) / alignment) * alignment; + } +} + +template +MDSPAN_INLINE_FUNCTION constexpr size_t get_actual_static_padding_value() { + constexpr auto rank = _ExtentsType::rank(); + + if constexpr (rank <= typename _ExtentsType::rank_type(1)) { + return 0; + } else if constexpr (_PaddingValue != dynamic_extent && + _ExtentsType::static_extent(_ExtentToPadIdx) != + dynamic_extent) { + static_assert( + (_PaddingValue != 0) || + (_ExtentsType::static_extent(_ExtentToPadIdx) == 0), + "padding stride can be 0 only if " + "extents_type::static_extent(extent-to-pad) is 0 or dynamic_extent"); + return find_next_multiple(_PaddingValue, + _ExtentsType::static_extent(_ExtentToPadIdx)); + } else { + return dynamic_extent; + } + // Missing return statement warning from NVCC +#ifdef __NVCC__ + return 0; +#endif +} + +template +struct static_array_type_for_padded_extent +{ + static constexpr size_t padding_value = _PaddingValue; + using index_type = typename _Extents::index_type; + using extents_type = _Extents; + using type = ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::maybe_static_array< + index_type, size_t, dynamic_extent, + detail::get_actual_static_padding_value()>; +}; + +template +struct static_array_type_for_padded_extent<_PaddingValue, _Extents, + _ExtentToPadIdx, Rank, std::enable_if_t> { + using index_type = typename _Extents::index_type; + using extents_type = _Extents; + using type = + ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::maybe_static_array< + index_type, size_t, dynamic_extent, 0>; +}; + +template +struct padded_extent { + static constexpr size_t padding_value = _PaddingValue; + using index_type = typename _Extents::index_type; + using extents_type = _Extents; + using static_array_type = typename static_array_type_for_padded_extent< + padding_value, _Extents, _ExtentToPadIdx, _Extents::rank()>::type; + + static constexpr auto static_value() { return static_array_type::static_value(0); } + + MDSPAN_INLINE_FUNCTION + static constexpr static_array_type + init_padding(const _Extents &exts) { + if constexpr ((_Extents::rank() > 1) && (padding_value == dynamic_extent)) { + return {exts.extent(_ExtentToPadIdx)}; + } else { + return init_padding(exts, padding_value); + } + // Missing return statement warning from NVCC +#ifdef __NVCC__ + return {}; +#endif + } + + MDSPAN_INLINE_FUNCTION static constexpr static_array_type + init_padding([[maybe_unused]] const _Extents &exts, + [[maybe_unused]] index_type pv) { + if constexpr (_Extents::rank() > 1) { + return {find_next_multiple(pv, + exts.extent(_ExtentToPadIdx))}; + } else { + return {}; + } + // Missing return statement warning from NVCC +#ifdef __NVCC__ + return {}; +#endif + } + + template + MDSPAN_INLINE_FUNCTION static constexpr static_array_type + init_padding([[maybe_unused]] const _Mapping &other_mapping, + std::integral_constant) { + if constexpr (_Extents::rank() > 1) { + return {other_mapping.stride(_PaddingStrideIdx)}; + } else { + return {}; + } + // Missing return statement warning from NVCC +#ifdef __NVCC__ + return {}; +#endif + } +}; +} // namespace detail + +template +template +class layout_left_padded::mapping { +public: + static constexpr size_t padding_value = PaddingValue; + + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_left_padded; + +#ifndef MDSPAN_INTERNAL_TEST +private: +#endif // MDSPAN_INTERNAL_TEST + + static constexpr rank_type padded_stride_idx = detail::layout_padded_constants::padded_stride_idx; + static constexpr rank_type extent_to_pad_idx = detail::layout_padded_constants::extent_to_pad_idx; + + static_assert((padding_value != 0) + || (extents_type::static_extent(extent_to_pad_idx) == 0) + || (extents_type::static_extent(extent_to_pad_idx) == dynamic_extent), + "out of bounds access for rank 0"); + + using padded_stride_type = detail::padded_extent< padding_value, extents_type, extent_to_pad_idx >; + + static constexpr size_t static_padding_stride = padded_stride_type::static_value(); + + typename padded_stride_type::static_array_type padded_stride = {}; + extents_type exts = {}; + + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { + return 0; + } + + template + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { + return index_offset; + } + + template + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { + index_type indices[] = {static_cast(index_offsets)...}; + // self-recursive fold trick from + // https://github.com/llvm/llvm-project/blob/96e1914aa2e6d8966acbfbe2f4d184201f1aa318/libcxx/include/mdspan/layout_left.h#L144 + index_type res = 0; + ((res = indices[extents_type::rank() - 1 - Ranks] + + ((extents_type::rank() - 1 - Ranks) == extent_to_pad_idx + ? padded_stride.value(0) + : exts.extent(extents_type::rank() - 1 - Ranks)) * + res), + ...); + return res; + } + +public: +#if !MDSPAN_HAS_CXX_20 + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr mapping() + : mapping(extents_type{}) + {} +#else + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr mapping() + requires(static_padding_stride != dynamic_extent) = default; + + MDSPAN_INLINE_FUNCTION + constexpr mapping() + requires(static_padding_stride == dynamic_extent) + : mapping(extents_type{}) + {} +#endif + + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + + /** + * Initializes the mapping with the given extents. + * + * \param ext the given extents + */ + MDSPAN_INLINE_FUNCTION + constexpr mapping(const extents_type& ext) + : padded_stride(padded_stride_type::init_padding(ext)), exts(ext) + {} + + /** + * Initializes the mapping with the given extents and the specified padding value. + * + * This overload participates in overload resolution only if `is_convertible_v` + * is `true` and `is_nothrow_constructible_v` is `true` + * + * \param ext the given extents + * \param padding_value the padding value + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Size, + /* requires */ ( + std::is_convertible_v<_Size, index_type> + && std::is_nothrow_constructible_v + ) + ) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const extents_type &ext, _Size dynamic_padding_value) + : padded_stride(padded_stride_type::init_padding(ext, dynamic_padding_value)), exts(ext) + { + assert((padding_value == dynamic_extent) || (static_cast(padding_value) == static_cast(dynamic_padding_value))); + } + + /** + * Converting constructor from `layout_left::mapping`. + * + * This overload participates in overload resolution only if + * `is_constructible_v` is true. If + * `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, + * or `OtherExtents::static_extent(0)` must be `dynamic_extent`; otherwise, + * `OtherExtents::static_extent(0)` must be equal to the least multiple of + * `padding_value` greater than or equal to `extents_type::static_extent(0)` + */ + MDSPAN_TEMPLATE_REQUIRES( + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const layout_left::mapping<_OtherExtents> &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (static_padding_stride != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (static_padding_stride == + _OtherExtents::static_extent(extent_to_pad_idx))); + } + + /** + * Converting constructor from `layout_stride::mapping`. + * + * This overload participates in overload resolution only if + * `is_constructible_v` is true + */ + MDSPAN_TEMPLATE_REQUIRES( + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} + + /** + * Converting constructor from `layout_left_padded::mapping`. + * + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && + (padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert(padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent || + padding_value == _Mapping::padding_value); + } + + /** + * Converting constructor from `layout_right_padded::mapping`. + * + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) noexcept + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} + + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { + return exts; + } + + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { + return {}; + } else if constexpr (extents_type::rank() == 1) { + return {1}; + } else { + index_type value = 1; + std::array s{}; + s[extent_to_pad_idx] = value; + value *= padded_stride.value(0); + for (rank_type r = extent_to_pad_idx + 1; r < extents_type::rank() - 1; + ++r) { + s[r] = value; + value *= exts.extent(r); + } + s[extents_type::rank() - 1] = value; + return s; + } + } -template -mdspan(ElementType*, const MappingType&) - -> mdspan; + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { + return 1; + } else if constexpr (extents_type::rank() == 1) { + return exts.extent(0); + } else { + index_type value = padded_stride.value(0); + for (rank_type r = 1; r < extents_type::rank(); ++r) { + value *= exts.extent(r); + } + return value; + } + } -template -mdspan(const typename AccessorType::data_handle_type, const MappingType&, const AccessorType&) - -> mdspan; -#endif + /** + * Return the mapping given the provided indices per rank. + * + * This overload participates in overload resolution only if: + * - `sizeof...(Indices) == extents_type::rank()`, + * - `(is_convertible_v && ...) is true`, and + * - (is_nothrow_constructible_v && ...) is true. + */ + MDSPAN_TEMPLATE_REQUIRES( + class... _Indices, + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { +#if !defined(NDEBUG) + ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::check_all_indices(this->extents(), + idxs...); +#endif // ! NDEBUG + return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; + } -} // end namespace experimental -} // end namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/mdspan.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_left.hpp -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ + MDSPAN_INLINE_FUNCTION + constexpr index_type stride(rank_type r) const noexcept { + assert(r < extents_type::rank()); + if (r == 0) + return index_type(1); + index_type value = padded_stride.value(0); + for (rank_type k = 1; k < r; k++) + value *= exts.extent(k); + return value; + } -namespace std { -namespace experimental { + /** + * Equality operator between `layout_left_padded`s + * + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. + * + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_left_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping + bool strides_equal = true; + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); + } + return (left.extents() == right.extents()) && strides_equal; + } -//============================================================================== +#if !MDSPAN_HAS_CXX_20 + /** + * Inequality operator between `layout_left_padded`s + * + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { + return !(left == right); + } +#endif +}; +template template -class layout_left::mapping { - public: - using extents_type = Extents; - using index_type = typename extents_type::index_type; - using size_type = typename extents_type::size_type; - using rank_type = typename extents_type::rank_type; - using layout_type = layout_left; +class layout_right_padded::mapping { +public: + static constexpr size_t padding_value = PaddingValue; + + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_right_padded; + +#ifndef MDSPAN_INTERNAL_TEST private: +#endif // MDSPAN_INTERNAL_TEST - static_assert(detail::__is_extents_v, "std::experimental::layout_left::mapping must be instantiated with a specialization of std::experimental::extents."); + static constexpr rank_type padded_stride_idx = detail::layout_padded_constants::padded_stride_idx; + static constexpr rank_type extent_to_pad_idx = detail::layout_padded_constants::extent_to_pad_idx; - template - friend class mapping; + static_assert((padding_value != 0) + || (extents_type::static_extent(extent_to_pad_idx) == 0) + || (extents_type::static_extent(extent_to_pad_idx) == dynamic_extent), + "if padding stride is 0, static_extent(extent-to-pad-rank) must also be 0 or dynamic_extent"); - // i0+(i1 + E(1)*(i2 + E(2)*i3)) - template - struct __rank_count {}; + using padded_stride_type = detail::padded_extent< padding_value, extents_type, extent_to_pad_idx >; + static constexpr size_t static_padding_stride = padded_stride_type::static_value(); - template - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset( - __rank_count, const I& i, Indices... idx) const { - return __compute_offset(__rank_count(), idx...) * - __extents.template __extent() + i; - } + typename padded_stride_type::static_array_type padded_stride = {}; + extents_type exts = {}; - template - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset( - __rank_count, const I& i) const { - return i; - } + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { + return 0; + } - _MDSPAN_HOST_DEVICE - constexpr index_type __compute_offset(__rank_count<0,0>) const { return 0; } + template + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { + return index_offset; + } - public: + template + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { + // self-recursive fold trick from + // https://github.com/llvm/llvm-project/blob/4d9771741d40cc9cfcccb6b033f43689d36b705a/libcxx/include/mdspan/layout_right.h#L141 + index_type res = 0; + ((res = static_cast(index_offsets) + + (Ranks == extent_to_pad_idx ? padded_stride.value(0) + : exts.extent(Ranks)) * + res), + ...); + return res; + } - //-------------------------------------------------------------------------------- +public: +#if !MDSPAN_HAS_CXX_20 + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr mapping() + : mapping(extents_type{}) + {} +#else + MDSPAN_INLINE_FUNCTION_DEFAULTED + constexpr mapping() + requires(static_padding_stride != dynamic_extent) = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(mapping const&) noexcept = default; + MDSPAN_INLINE_FUNCTION + constexpr mapping() + requires(static_padding_stride == dynamic_extent) + : mapping(extents_type{}) + {} +#endif - _MDSPAN_HOST_DEVICE - constexpr mapping(extents_type const& __exts) noexcept - :__extents(__exts) - { } + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, + /** + * Initializes the mapping with the given extents. + * + * \param ext the given extents + */ + MDSPAN_INLINE_FUNCTION + constexpr mapping(const extents_type &ext) + : padded_stride(padded_stride_type::init_padding(ext)), exts(ext) {} + + /** + * Initializes the mapping with the given extents and the specified padding value. + * + * This overload participates in overload resolution only if `is_convertible_v` + * is `true` and `is_nothrow_constructible_v` is `true` + * + * \param ext the given extents + * \param padding_value the padding value + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Size, /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) + std::is_convertible_v<_Size, index_type> + && std::is_nothrow_constructible_v + ) ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!is_convertible::value)) // needs two () due to comma - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(mapping const& other) noexcept // NOLINT(google-explicit-constructor) - :__extents(other.extents()) - { - /* - * TODO: check precondition - * other.required_span_size() is a representable value of type index_type - */ - } + MDSPAN_INLINE_FUNCTION + constexpr mapping(const extents_type &ext, _Size dynamic_padding_value) + : padded_stride(padded_stride_type::init_padding(ext, static_cast(dynamic_padding_value))), + exts(ext) { + assert((padding_value == dynamic_extent) || + (static_cast(padding_value) == static_cast(dynamic_padding_value))); + } - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) && - (extents_type::rank() <= 1) - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!is_convertible::value)) // needs two () due to comma - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(layout_right::mapping const& other) noexcept // NOLINT(google-explicit-constructor) - :__extents(other.extents()) - { - /* - * TODO: check precondition - * other.required_span_size() is a representable value of type index_type - */ - } + /** + * Converting constructor from `layout_right::mapping`. + * + * This overload participates in overload resolution only if `is_constructible_v` is true. + * If `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, or `OtherExtents::static_extent(0)` must be `dynamic_extent`; + * otherwise, `OtherExtents::static_extent(0)` must be equal to the least multiple of `padding_value` greater than or equal to `extents_type::static_extent(0)` + */ + MDSPAN_TEMPLATE_REQUIRES( + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const layout_right::mapping<_OtherExtents> &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (padded_stride_type::static_value() != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (padded_stride_type::static_value() == + _OtherExtents::static_extent(extent_to_pad_idx))); + } - MDSPAN_TEMPLATE_REQUIRES( - class OtherExtents, - /* requires */ ( - _MDSPAN_TRAIT(is_constructible, extents_type, OtherExtents) - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 - mapping(layout_stride::mapping const& other) // NOLINT(google-explicit-constructor) - :__extents(other.extents()) - { - /* - * TODO: check precondition - * other.required_span_size() is a representable value of type index_type - */ - #ifndef __CUDA_ARCH__ - size_t stride = 1; - for(rank_type r=0; r<__extents.rank(); r++) { - if(stride != other.stride(r)) - throw std::runtime_error("Assigning layout_stride to layout_left with invalid strides."); - stride *= __extents.extent(r); - } - #endif - } + /** + * Converting constructor from `layout_stride::mapping`. + * + * This overload participates in overload resolution only if + * `is_constructible_v` is true + */ + MDSPAN_TEMPLATE_REQUIRES( + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} + + /** + * Converting constructor from `layout_right_padded::mapping`. + * + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && + (padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert(padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent || + padding_value == _Mapping::padding_value); + } - MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; + /** + * Converting constructor from `layout_left_padded::mapping`. + * + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) noexcept + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} + + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { + return exts; + } - MDSPAN_INLINE_FUNCTION - constexpr const extents_type& extents() const noexcept { - return __extents; + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { + return {}; + } else if constexpr (extents_type::rank() == 1) { + return {1}; + } else { + index_type value = 1; + std::array s{}; + s[extent_to_pad_idx] = value; + value *= padded_stride.value(0); + for (rank_type r = extent_to_pad_idx - 1; r > 0; --r) { + s[r] = value; + value *= exts.extent(r); + } + s[0] = value; + return s; } + } - MDSPAN_INLINE_FUNCTION - constexpr index_type required_span_size() const noexcept { + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { + return 1; + } else if constexpr (extents_type::rank() == 1) { + return exts.extent(0); + } else { index_type value = 1; - for(rank_type r=0; r(), idxs...); - } + /** + * Return the mapping given the provided indices per rank. + * + * This overload participates in overload resolution only if: + * - `sizeof...(Indices) == extents_type::rank()`, + * - `(is_convertible_v && ...) is true`, and + * - (is_nothrow_constructible_v && ...) is true. + */ + MDSPAN_TEMPLATE_REQUIRES( + class... _Indices, + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { + return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; + } - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { return true; } - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { return true; } - MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } + MDSPAN_INLINE_FUNCTION constexpr index_type + stride(rank_type r) const noexcept { + assert(r < extents_type::rank()); + if (r == extents_type::rank() - 1) + return index_type(1); - MDSPAN_INLINE_FUNCTION constexpr bool is_unique() const noexcept { return true; } - MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { return true; } - MDSPAN_INLINE_FUNCTION constexpr bool is_strided() const noexcept { return true; } + index_type value = padded_stride.value(0); + for (rank_type k = extents_type::rank() - 2; k > r; k--) + value *= exts.extent(k); - MDSPAN_INLINE_FUNCTION - constexpr index_type stride(rank_type i) const noexcept { - index_type value = 1; - for(rank_type r=0; r - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator==(mapping const& lhs, mapping const& rhs) noexcept { - return lhs.extents() == rhs.extents(); + /** + * Equality operator between `layout_right_padded`s + * + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. + * + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_right_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping + bool strides_equal = true; + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); } + return (left.extents() == right.extents()) && strides_equal; + } - // In C++ 20 the not equal exists if equal is found -#if MDSPAN_HAS_CXX_20 - template - MDSPAN_INLINE_FUNCTION - friend constexpr bool operator!=(mapping const& lhs, mapping const& rhs) noexcept { - return lhs.extents() != rhs.extents(); - } +#if !MDSPAN_HAS_CXX_20 + /** + * Inequality operator between `layout_right_padded`s + * + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. + */ + MDSPAN_TEMPLATE_REQUIRES( + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { + return !(left == right); + } #endif - - // Not really public, but currently needed to implement fully constexpr useable submdspan: - template - constexpr index_type __get_stride(std::experimental::extents,integer_sequence) const { - return _MDSPAN_FOLD_TIMES_RIGHT((Idx():1),1); - } - template - constexpr index_type __stride() const noexcept { - return __get_stride(__extents, make_index_sequence()); - } - -private: - _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; - }; +} +} +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2642_bits/layout_padded.hpp +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan.hpp +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER -} // end namespace experimental -} // end namespace std - -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/layout_left.hpp -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/submdspan.hpp -/* +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +//@HEADER + + +#include + +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/strided_slice.hpp + +//@HEADER +// ************************************************************************ // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// ************************************************************************ //@HEADER -*/ +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace { + template + struct __mdspan_is_integral_constant: std::false_type {}; + template + struct __mdspan_is_integral_constant>: std::true_type {}; +} -namespace std { -namespace experimental { +// Slice Specifier allowing for strides and compile time extent +template +struct strided_slice { + using offset_type = OffsetType; + using extent_type = ExtentType; + using stride_type = StrideType; -namespace detail { + _MDSPAN_NO_UNIQUE_ADDRESS OffsetType offset{}; + _MDSPAN_NO_UNIQUE_ADDRESS ExtentType extent{}; + _MDSPAN_NO_UNIQUE_ADDRESS StrideType stride{}; -template -struct __slice_wrap { - T slice; - size_t old_extent; - size_t old_stride; + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); + static_assert(std::is_integral_v || __mdspan_is_integral_constant::value); }; -//-------------------------------------------------------------------------------- +} // MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/strided_slice.hpp +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { -template -MDSPAN_INLINE_FUNCTION constexpr -__slice_wrap -__wrap_slice(size_t val, size_t ext, size_t stride) { return { val, ext, stride }; } +// Mapping from submapping ranks to srcmapping ranks +// InvMapRank is an index_sequence, which we build recursively +// to contain the mapped indices. +// end of recursion specialization containing the final index_sequence +template +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant, std::index_sequence) { + return std::index_sequence(); +} -template -MDSPAN_INLINE_FUNCTION constexpr -__slice_wrap> -__wrap_slice(size_t val, size_t ext, std::integral_constant stride) -{ -#if MDSPAN_HAS_CXX_17 - if constexpr (std::is_signed_v) { - static_assert(Value0 >= IntegerType(0), "Invalid slice specifier"); +// specialization reducing rank by one (i.e., integral slice specifier) +template +MDSPAN_INLINE_FUNCTION +constexpr auto inv_map_rank(std::integral_constant, std::index_sequence, Slice, + SliceSpecifiers... slices) { + using next_idx_seq_t = std::conditional_t, + std::index_sequence, + std::index_sequence>; + + return inv_map_rank(std::integral_constant(), next_idx_seq_t(), + slices...); +} + +// Helper for identifying strided_slice +template struct is_strided_slice : std::false_type {}; + +template +struct is_strided_slice< + strided_slice> : std::true_type {}; + +// first_of(slice): getting begin of slice specifier range +MDSPAN_TEMPLATE_REQUIRES( + class Integral, + /* requires */(std::is_convertible_v) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral first_of(const Integral &i) { + return i; +} + +MDSPAN_INLINE_FUNCTION +constexpr std::integral_constant +first_of(const ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t &) { + return std::integral_constant(); +} + +MDSPAN_TEMPLATE_REQUIRES( + class Slice, + /* requires */(std::is_convertible_v>) +) +MDSPAN_INLINE_FUNCTION +constexpr auto first_of(const Slice &i) { + return std::get<0>(i); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +first_of(const strided_slice &r) { + return r.offset; +} + +// last_of(slice): getting end of slice specifier range +// We need however not just the slice but also the extents +// of the original view and which rank from the extents. +// This is needed in the case of slice being full_extent_t. +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Integral, + /* requires */(std::is_convertible_v) +) +MDSPAN_INLINE_FUNCTION +constexpr Integral + last_of(std::integral_constant, const Extents &, const Integral &i) { + return i; +} + +MDSPAN_TEMPLATE_REQUIRES( + size_t k, class Extents, class Slice, + /* requires */(std::is_convertible_v>) +) +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &, + const Slice &i) { + return std::get<1>(i); +} + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template +MDSPAN_INLINE_FUNCTION +constexpr auto last_of(std::integral_constant, const Extents &ext, + ::MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent_t) { + if constexpr (Extents::static_extent(k) == dynamic_extent) { + return ext.extent(k); + } else { + return std::integral_constant(); } -#endif // MDSPAN_HAS_CXX_17 +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + // Even with CUDA_ARCH protection this thing warns about calling host function + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +template +MDSPAN_INLINE_FUNCTION +constexpr OffsetType +last_of(std::integral_constant, const Extents &, + const strided_slice &r) { + return r.extent; +} - return { val, ext, stride }; +// get stride of slices +template +MDSPAN_INLINE_FUNCTION +constexpr auto stride_of(const T &) { + return std::integral_constant(); } -template -MDSPAN_INLINE_FUNCTION constexpr -__slice_wrap -__wrap_slice(full_extent_t val, size_t ext, size_t stride) { return { val, ext, stride }; } +template +MDSPAN_INLINE_FUNCTION +constexpr auto +stride_of(const strided_slice &r) { + return r.stride; +} -// TODO generalize this to anything that works with std::get<0> and std::get<1> -template -MDSPAN_INLINE_FUNCTION constexpr -__slice_wrap> -__wrap_slice(std::tuple const& val, size_t ext, size_t stride) -{ - return { val, ext, stride }; +// divide which can deal with integral constant preservation +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const T0 &v0, const T1 &v1) { + return IndexT(v0) / IndexT(v1); } -template -MDSPAN_INLINE_FUNCTION constexpr - __slice_wrap, - std::integral_constant>> -__wrap_slice(std::tuple, std::integral_constant> const& val, size_t ext, size_t stride) -{ - static_assert(Value1 >= Value0, "Invalid slice tuple"); - return { val, ext, stride }; +template +MDSPAN_INLINE_FUNCTION +constexpr auto divide(const std::integral_constant &, + const std::integral_constant &) { + // cutting short division by zero + // this is used for strided_slice with zero extent/stride + return std::integral_constant(); } -//-------------------------------------------------------------------------------- +// multiply which can deal with integral constant preservation +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const T0 &v0, const T1 &v1) { + return IndexT(v0) * IndexT(v1); +} +template +MDSPAN_INLINE_FUNCTION +constexpr auto multiply(const std::integral_constant &, + const std::integral_constant &) { + return std::integral_constant(); +} -// a layout right remains a layout right if it is indexed by 0 or more scalars, -// then optionally a pair and finally 0 or more all -template < - // what we encountered until now preserves the layout right - bool result=true, - // we only encountered 0 or more scalars, no pair or all - bool encountered_only_scalar=true -> -struct preserve_layout_right_analysis : integral_constant { - using layout_type_if_preserved = layout_right; - using encounter_pair = preserve_layout_right_analysis< - // if we encounter a pair, the layout remains a layout right only if it was one before - // and that only scalars were encountered until now - result && encountered_only_scalar, - // if we encounter a pair, we didn't encounter scalars only - false - >; - using encounter_all = preserve_layout_right_analysis< - // if we encounter a all, the layout remains a layout right if it was one before - result, - // if we encounter a all, we didn't encounter scalars only - false - >; - using encounter_scalar = preserve_layout_right_analysis< - // if we encounter a scalar, the layout remains a layout right only if it was one before - // and that only scalars were encountered until now - result && encountered_only_scalar, - // if we encounter a scalar, the fact that we encountered scalars only doesn't change - encountered_only_scalar - >; +// compute new static extent from range, preserving static knowledge +template struct StaticExtentFromRange { + constexpr static size_t value = dynamic_extent; }; -// a layout left remains a layout left if it is indexed by 0 or more all, -// then optionally a pair and finally 0 or more scalars -template < - bool result=true, - bool encountered_only_all=true -> -struct preserve_layout_left_analysis : integral_constant { - using layout_type_if_preserved = layout_left; - using encounter_pair = preserve_layout_left_analysis< - // if we encounter a pair, the layout remains a layout left only if it was one before - // and that only all were encountered until now - result && encountered_only_all, - // if we encounter a pair, we didn't encounter all only - false - >; - using encounter_all = preserve_layout_left_analysis< - // if we encounter a all, the layout remains a layout left only if it was one before - // and that only all were encountered until now - result && encountered_only_all, - // if we encounter a all, the fact that we encountered scalars all doesn't change - encountered_only_all - >; - using encounter_scalar = preserve_layout_left_analysis< - // if we encounter a scalar, the layout remains a layout left if it was one before - result, - // if we encounter a scalar, we didn't encounter scalars only - false - >; +template +struct StaticExtentFromRange, + std::integral_constant> { + constexpr static size_t value = val1 - val0; }; -struct ignore_layout_preservation : std::integral_constant { - using layout_type_if_preserved = void; - using encounter_pair = ignore_layout_preservation; - using encounter_all = ignore_layout_preservation; - using encounter_scalar = ignore_layout_preservation; +// compute new static extent from strided_slice, preserving static +// knowledge +template struct StaticExtentFromStridedRange { + constexpr static size_t value = dynamic_extent; }; -template -struct preserve_layout_analysis - : ignore_layout_preservation { }; -template <> -struct preserve_layout_analysis - : preserve_layout_right_analysis<> { }; -template <> -struct preserve_layout_analysis - : preserve_layout_left_analysis<> { }; - -//-------------------------------------------------------------------------------- - -template < - class _IndexT, - class _PreserveLayoutAnalysis, - class _OffsetsArray=__partially_static_sizes<_IndexT, size_t>, - class _ExtsArray=__partially_static_sizes<_IndexT, size_t>, - class _StridesArray=__partially_static_sizes<_IndexT, size_t>, - class = make_index_sequence<_OffsetsArray::__size>, - class = make_index_sequence<_ExtsArray::__size>, - class = make_index_sequence<_StridesArray::__size> -> -struct __assign_op_slice_handler; - -/* clang-format: off */ -template < - class _IndexT, - class _PreserveLayoutAnalysis, - size_t... _Offsets, - size_t... _Exts, - size_t... _Strides, - size_t... _OffsetIdxs, - size_t... _ExtIdxs, - size_t... _StrideIdxs> -struct __assign_op_slice_handler< - _IndexT, - _PreserveLayoutAnalysis, - __partially_static_sizes<_IndexT, size_t, _Offsets...>, - __partially_static_sizes<_IndexT, size_t, _Exts...>, - __partially_static_sizes<_IndexT, size_t, _Strides...>, - integer_sequence, - integer_sequence, - integer_sequence> -{ - // TODO remove this for better compiler performance - static_assert( - _MDSPAN_FOLD_AND((_Strides == dynamic_extent || _Strides > 0) /* && ... */), - " " - ); - static_assert( - _MDSPAN_FOLD_AND((_Offsets == dynamic_extent || _Offsets >= 0) /* && ... */), - " " - ); +template +struct StaticExtentFromStridedRange, + std::integral_constant> { + constexpr static size_t value = val0 > 0 ? 1 + (val0 - 1) / val1 : 0; +}; - using __offsets_storage_t = __partially_static_sizes<_IndexT, size_t, _Offsets...>; - using __extents_storage_t = __partially_static_sizes<_IndexT, size_t, _Exts...>; - using __strides_storage_t = __partially_static_sizes<_IndexT, size_t, _Strides...>; - __offsets_storage_t __offsets; - __extents_storage_t __exts; - __strides_storage_t __strides; - -#ifdef __INTEL_COMPILER -#if __INTEL_COMPILER <= 1800 - MDSPAN_INLINE_FUNCTION constexpr __assign_op_slice_handler(__assign_op_slice_handler&& __other) noexcept - : __offsets(::std::move(__other.__offsets)), __exts(::std::move(__other.__exts)), __strides(::std::move(__other.__strides)) - { } - MDSPAN_INLINE_FUNCTION constexpr __assign_op_slice_handler( - __offsets_storage_t&& __o, - __extents_storage_t&& __e, - __strides_storage_t&& __s - ) noexcept - : __offsets(::std::move(__o)), __exts(::std::move(__e)), __strides(::std::move(__s)) - { } -#endif -#endif +// creates new extents through recursive calls to next_extent member function +// next_extent has different overloads for different types of stride specifiers +template +struct extents_constructor { + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */(!std::is_convertible_v && + !is_strided_slice::value) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &sl, + SlicesAndExtents... slices_and_extents) { + constexpr size_t new_static_extent = StaticExtentFromRange< + decltype(first_of(std::declval())), + decltype(last_of(std::integral_constant(), + std::declval(), + std::declval()))>::value; + + using next_t = + extents_constructor; + using index_t = typename Extents::index_type; + return next_t::next_extent( + ext, slices_and_extents..., + index_t(last_of(std::integral_constant(), ext, + sl)) - + index_t(first_of(sl))); + } -// Don't define this unless we need it; they have a cost to compile -#ifndef _MDSPAN_USE_RETURN_TYPE_DEDUCTION - using __extents_type = ::std::experimental::extents<_IndexT, _Exts...>; -#endif - - // For size_t slice, skip the extent and stride, but add an offset corresponding to the value - template - MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator) - _MDSPAN_CONSTEXPR_14 auto - operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, size_t>&& __slice) noexcept - -> __assign_op_slice_handler< - _IndexT, - typename _PreserveLayoutAnalysis::encounter_scalar, - __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>, - __partially_static_sizes<_IndexT, size_t, _Exts...>, - __partially_static_sizes<_IndexT, size_t, _Strides...>/* intentional space here to work around ICC bug*/> { - return { - __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>( - __construct_psa_from_all_exts_values_tag, - __offsets.template __get_n<_OffsetIdxs>()..., __slice.slice), - ::std::move(__exts), - ::std::move(__strides) - }; + MDSPAN_TEMPLATE_REQUIRES( + class Slice, class... SlicesAndExtents, + /* requires */ (std::is_convertible_v) + ) + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &ext, const Slice &, + SlicesAndExtents... slices_and_extents) { + using next_t = extents_constructor; + return next_t::next_extent(ext, slices_and_extents...); } - // Treat integral_constant slice like size_t slice, but with a compile-time offset. - // The result's extents_type can't take advantage of that, - // but it might help for specialized layouts. - template - MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator) - _MDSPAN_CONSTEXPR_14 auto - operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, std::integral_constant>&&) noexcept - -> __assign_op_slice_handler< - _IndexT, - typename _PreserveLayoutAnalysis::encounter_scalar, - __partially_static_sizes<_IndexT, size_t, _Offsets..., Value0>, - __partially_static_sizes<_IndexT, size_t, _Exts...>, - __partially_static_sizes<_IndexT, size_t, _Strides...>/* intentional space here to work around ICC bug*/> { -#if MDSPAN_HAS_CXX_17 - if constexpr (std::is_signed_v) { - static_assert(Value0 >= IntegerType(0), "Invalid slice specifier"); + template + MDSPAN_INLINE_FUNCTION + constexpr static auto + next_extent(const Extents &ext, + const strided_slice &r, + SlicesAndExtents... slices_and_extents) { + using index_t = typename Extents::index_type; + using new_static_extent_t = + StaticExtentFromStridedRange; + if constexpr (new_static_extent_t::value == dynamic_extent) { + using next_t = + extents_constructor; + return next_t::next_extent( + ext, slices_and_extents..., + r.extent > 0 ? 1 + divide(r.extent - 1, r.stride) : 0); + } else { + constexpr size_t new_static_extent = new_static_extent_t::value; + using next_t = + extents_constructor; + return next_t::next_extent( + ext, slices_and_extents..., index_t(divide(ExtentType(), StrideType()))); } -#endif // MDSPAN_HAS_CXX_17 - return { - __partially_static_sizes<_IndexT, size_t, _Offsets..., Value0>( - __construct_psa_from_all_exts_values_tag, - __offsets.template __get_n<_OffsetIdxs>()..., size_t(Value0)), - ::std::move(__exts), - ::std::move(__strides) - }; } +}; - // For a std::full_extent, offset 0 and old extent - template - MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator) - _MDSPAN_CONSTEXPR_14 auto - operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, full_extent_t>&& __slice) noexcept - -> __assign_op_slice_handler< - _IndexT, - typename _PreserveLayoutAnalysis::encounter_all, - __partially_static_sizes<_IndexT, size_t, _Offsets..., 0>, - __partially_static_sizes<_IndexT, size_t, _Exts..., _OldStaticExtent>, - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>/* intentional space here to work around ICC bug*/> { - return { - __partially_static_sizes<_IndexT, size_t, _Offsets..., 0>( - __construct_psa_from_all_exts_values_tag, - __offsets.template __get_n<_OffsetIdxs>()..., size_t(0)), - __partially_static_sizes<_IndexT, size_t, _Exts..., _OldStaticExtent>( - __construct_psa_from_all_exts_values_tag, - __exts.template __get_n<_ExtIdxs>()..., __slice.old_extent), - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>( - __construct_psa_from_all_exts_values_tag, - __strides.template __get_n<_StrideIdxs>()..., __slice.old_stride) - }; - } +template +struct extents_constructor<0, Extents, NewStaticExtents...> { - // For a std::tuple, add an offset and add a new dynamic extent (strides still preserved) - template - MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator) - _MDSPAN_CONSTEXPR_14 auto - operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, tuple>&& __slice) noexcept - -> __assign_op_slice_handler< - _IndexT, - typename _PreserveLayoutAnalysis::encounter_pair, - __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>, - __partially_static_sizes<_IndexT, size_t, _Exts..., dynamic_extent>, - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>/* intentional space here to work around ICC bug*/> { - return { - __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>( - __construct_psa_from_all_exts_values_tag, - __offsets.template __get_n<_OffsetIdxs>()..., ::std::get<0>(__slice.slice)), - __partially_static_sizes<_IndexT, size_t, _Exts..., dynamic_extent>( - __construct_psa_from_all_exts_values_tag, - __exts.template __get_n<_ExtIdxs>()..., ::std::get<1>(__slice.slice) - ::std::get<0>(__slice.slice)), - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>( - __construct_psa_from_all_exts_values_tag, - __strides.template __get_n<_StrideIdxs>()..., __slice.old_stride) - }; + template + MDSPAN_INLINE_FUNCTION + constexpr static auto next_extent(const Extents &, NewExtents... new_exts) { + return extents( + new_exts...); } +}; - // For a std::tuple of two std::integral_constant, do something like - // we did above for a tuple of two size_t, but make sure the - // result's extents type make the values compile-time constants. - template - MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator) - _MDSPAN_CONSTEXPR_14 auto - operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, tuple, std::integral_constant>>&& __slice) noexcept - -> __assign_op_slice_handler< - _IndexT, - typename _PreserveLayoutAnalysis::encounter_pair, - __partially_static_sizes<_IndexT, size_t, _Offsets..., size_t(Value0)>, - __partially_static_sizes<_IndexT, size_t, _Exts..., size_t(Value1 - Value0)>, - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>/* intentional space here to work around ICC bug*/> { - static_assert(Value1 >= Value0, "Invalid slice specifier"); - return { - // We're still turning the template parameters Value0 and Value1 - // into (constexpr) run-time values here. - __partially_static_sizes<_IndexT, size_t, _Offsets..., size_t(Value0) > ( - __construct_psa_from_all_exts_values_tag, - __offsets.template __get_n<_OffsetIdxs>()..., Value0), - __partially_static_sizes<_IndexT, size_t, _Exts..., size_t(Value1 - Value0) > ( - __construct_psa_from_all_exts_values_tag, - __exts.template __get_n<_ExtIdxs>()..., Value1 - Value0), - __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>( - __construct_psa_from_all_exts_values_tag, - __strides.template __get_n<_StrideIdxs>()..., __slice.old_stride) - }; - } +} // namespace detail - // TODO defer instantiation of this? - using layout_type = typename conditional< - _PreserveLayoutAnalysis::value, - typename _PreserveLayoutAnalysis::layout_type_if_preserved, - layout_stride - >::type; +// submdspan_extents creates new extents given src extents and submdspan slice +// specifiers +template +MDSPAN_INLINE_FUNCTION +constexpr auto submdspan_extents(const extents &src_exts, + SliceSpecifiers... slices) { - // TODO noexcept specification - template - MDSPAN_INLINE_FUNCTION - _MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE( - ( - _MDSPAN_CONSTEXPR_14 /* auto */ - _make_layout_mapping_impl(NewLayout) noexcept - ), - ( - /* not layout stride, so don't pass dynamic_strides */ - /* return */ typename NewLayout::template mapping<::std::experimental::extents<_IndexT, _Exts...>>( - experimental::extents<_IndexT, _Exts...>::__make_extents_impl(::std::move(__exts)) - ) /* ; */ - ) - ) + using ext_t = extents; + return detail::extents_constructor::next_extent( + src_exts, slices...); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan_extents.hpp +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER - MDSPAN_INLINE_FUNCTION - _MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE( - ( - _MDSPAN_CONSTEXPR_14 /* auto */ - _make_layout_mapping_impl(layout_stride) noexcept - ), - ( - /* return */ layout_stride::template mapping<::std::experimental::extents<_IndexT, _Exts...>> - ::__make_mapping(::std::move(__exts), ::std::move(__strides)) /* ; */ - ) - ) - template // mostly for deferred instantiation, but maybe we'll use this in the future - MDSPAN_INLINE_FUNCTION - _MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE( - ( - _MDSPAN_CONSTEXPR_14 /* auto */ - make_layout_mapping(OldLayoutMapping const&) noexcept - ), - ( - /* return */ this->_make_layout_mapping_impl(layout_type{}) /* ; */ - ) - ) +#include +#include +#include +#include // index_sequence + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +//****************************************** +// Return type of submdspan_mapping overloads +//****************************************** +template struct submdspan_mapping_result { + _MDSPAN_NO_UNIQUE_ADDRESS LayoutMapping mapping{}; + size_t offset; }; -//============================================================================== +namespace detail { -#if _MDSPAN_USE_RETURN_TYPE_DEDUCTION -// Forking this because the C++11 version will be *completely* unreadable -template +// We use const Slice& and not Slice&& because the various +// submdspan_mapping_impl overloads use their slices arguments +// multiple times. This makes perfect forwarding not useful, but we +// still don't want to pass those (possibly of size 64 x 3 bits) +// objects by value. +template MDSPAN_INLINE_FUNCTION -constexpr auto _submdspan_impl( - integer_sequence, - mdspan, LP, AP> const& src, - SliceSpecs&&... slices -) noexcept +constexpr bool +one_slice_out_of_bounds(const IndexType& extent, const Slice& slice) { - using _IndexT = ST; - auto _handled = - _MDSPAN_FOLD_ASSIGN_LEFT( - ( - detail::__assign_op_slice_handler< - _IndexT, - detail::preserve_layout_analysis - >{ - __partially_static_sizes<_IndexT, size_t>{}, - __partially_static_sizes<_IndexT, size_t>{}, - __partially_static_sizes<_IndexT, size_t>{} - } - ), - /* = ... = */ - detail::__wrap_slice< - Exts, dynamic_extent - >( - slices, src.extents().template __extent(), - src.mapping().stride(Idxs) - ) - ); - - size_t offset_size = src.mapping()(_handled.__offsets.template __get_n()...); - auto offset_ptr = src.accessor().offset(src.data_handle(), offset_size); - auto map = _handled.make_layout_mapping(src.mapping()); - auto acc_pol = typename AP::offset_policy(src.accessor()); - return mdspan< - ET, remove_const_t>, - typename decltype(_handled)::layout_type, remove_const_t> - >( - std::move(offset_ptr), std::move(map), std::move(acc_pol) + using common_t = std::common_type_t; + return static_cast(detail::first_of(slice)) == static_cast(extent); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr bool +any_slice_out_of_bounds_helper(std::index_sequence, + const extents& exts, + const Slices& ... slices) +{ + return _MDSPAN_FOLD_OR( + (one_slice_out_of_bounds(exts.extent(RankIndices), slices)) ); } -#else -template -auto _submdspan_impl_helper(Src&& src, Handled&& h, std::integer_sequence) - -> mdspan< - ET, typename Handled::__extents_type, typename Handled::layout_type, typename AP::offset_policy - > +template +MDSPAN_INLINE_FUNCTION +constexpr bool +any_slice_out_of_bounds(const extents& exts, + const Slices& ... slices) { - return { - src.accessor().offset(src.data_handle(), src.mapping()(h.__offsets.template __get_n()...)), - h.make_layout_mapping(src.mapping()), - typename AP::offset_policy(src.accessor()) - }; + return any_slice_out_of_bounds_helper( + std::make_index_sequence(), + exts, slices...); } -template +// constructs sub strides +template MDSPAN_INLINE_FUNCTION -_MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE( - ( - constexpr /* auto */ _submdspan_impl( - std::integer_sequence seq, - mdspan, LP, AP> const& src, - SliceSpecs&&... slices - ) noexcept - ), - ( - /* return */ _submdspan_impl_helper( - src, - _MDSPAN_FOLD_ASSIGN_LEFT( - ( - detail::__assign_op_slice_handler< - size_t, - detail::preserve_layout_analysis - >{ - __partially_static_sizes{}, - __partially_static_sizes{}, - __partially_static_sizes{} - } - ), - /* = ... = */ - detail::__wrap_slice< - Exts, dynamic_extent - >( - slices, src.extents().template __extent(), src.mapping().stride(Idxs) - ) - ), - seq - ) /* ; */ - ) -) +constexpr auto +construct_sub_strides(const SrcMapping &src_mapping, + std::index_sequence, + const std::tuple &slices_stride_factor) { + using index_type = typename SrcMapping::index_type; + return std::array{ + (static_cast(src_mapping.stride(InvMapIdxs)) * + static_cast(std::get(slices_stride_factor)))...}; +} +} // namespace detail -#endif +//********************************** +// layout_left submdspan_mapping +//********************************* +namespace detail { -template struct _is_layout_stride : std::false_type { }; -template<> -struct _is_layout_stride< - layout_stride -> : std::true_type -{ }; +// Figure out whether to preserve layout_left +template +struct preserve_layout_left_mapping; +template +struct preserve_layout_left_mapping, SubRank, + SliceSpecifiers...> { + constexpr static bool value = + // Preserve layout for rank 0 + (SubRank == 0) || + ( + // Slice specifiers up to subrank need to be full_extent_t - except + // for the last one which could also be tuple but not a strided index + // range slice specifiers after subrank are integrals + ((Idx > SubRank - 1) || // these are only integral slice specifiers + (std::is_same_v) || + ((Idx == SubRank - 1) && + std::is_convertible_v>)) && + ...); +}; } // namespace detail -//============================================================================== +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +// Actual submdspan mapping call +template +template +MDSPAN_INLINE_FUNCTION +constexpr auto +layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + constexpr bool preserve_layout = detail::preserve_layout_left_mapping< + decltype(std::make_index_sequence()), dst_ext_t::rank(), + SliceSpecifiers...>::value; + using dst_layout_t = + std::conditional_t; + using dst_mapping_t = typename dst_layout_t::template mapping; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? + this->required_span_size() : + this->operator()(detail::first_of(slices)...) + ); -MDSPAN_TEMPLATE_REQUIRES( - class ET, class EXT, class LP, class AP, class... SliceSpecs, - /* requires */ ( - ( - _MDSPAN_TRAIT(is_same, LP, layout_left) - || _MDSPAN_TRAIT(is_same, LP, layout_right) - || detail::_is_layout_stride::value - ) && - _MDSPAN_FOLD_AND(( - _MDSPAN_TRAIT(is_convertible, SliceSpecs, size_t) - || _MDSPAN_TRAIT(is_convertible, SliceSpecs, tuple) - || _MDSPAN_TRAIT(is_convertible, SliceSpecs, full_extent_t) - ) /* && ... */) && - sizeof...(SliceSpecs) == EXT::rank() - ) -) + if constexpr (std::is_same_v) { + // layout_left case + return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; + } else { + // layout_stride case + auto inv_map = detail::inv_map_rank( + std::integral_constant(), + std::index_sequence<>(), + slices...); + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, detail::construct_sub_strides( + *this, inv_map, + // HIP needs deduction guides to have markups so we need to be explicit + // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue + // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether + #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{detail::stride_of(slices)...})), + #else + std::tuple{detail::stride_of(slices)...})), + #endif + offset}; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +//********************************** +// layout_right submdspan_mapping +//********************************* +namespace detail { + +// Figure out whether to preserve layout_right +template +struct preserve_layout_right_mapping; + +template +struct preserve_layout_right_mapping, SubRank, + SliceSpecifiers...> { + constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); + constexpr static bool value = + // Preserve layout for rank 0 + (SubRank == 0) || + ( + // The last subrank slice specifiers need to be full_extent_t - except + // for the srcrank-subrank one which could also be tuple but not a + // strided index range slice specifiers before srcrank-subrank are + // integrals + ((Idx < + SrcRank - SubRank) || // these are only integral slice specifiers + (std::is_same_v) || + ((Idx == SrcRank - SubRank) && + std::is_convertible_v>)) && + ...); +}; +} // namespace detail + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template +template MDSPAN_INLINE_FUNCTION -_MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE( - ( - constexpr submdspan( - mdspan const& src, SliceSpecs... slices - ) noexcept - ), - ( - /* return */ - detail::_submdspan_impl(std::make_index_sequence{}, src, slices...) /*;*/ - ) -) -/* clang-format: on */ +constexpr auto +layout_right::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + // get sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // determine new layout type + constexpr bool preserve_layout = detail::preserve_layout_right_mapping< + decltype(std::make_index_sequence()), dst_ext_t::rank(), + SliceSpecifiers...>::value; + using dst_layout_t = + std::conditional_t; + using dst_mapping_t = typename dst_layout_t::template mapping; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? + this->required_span_size() : + this->operator()(detail::first_of(slices)...) + ); -} // end namespace experimental -} // namespace std -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p0009_bits/submdspan.hpp -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/mdspan -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p1684_bits/mdarray.hpp -/* + if constexpr (std::is_same_v) { + // layout_right case + return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; + } else { + // layout_stride case + auto inv_map = detail::inv_map_rank( + std::integral_constant(), + std::index_sequence<>(), + slices...); + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, detail::construct_sub_strides( + *this, inv_map, + // HIP needs deduction guides to have markups so we need to be explicit + // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue + // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether + #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{detail::stride_of(slices)...})), + #else + std::tuple{detail::stride_of(slices)...})), + #endif + offset}; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +//********************************** +// layout_stride submdspan_mapping +//********************************* +template +template +MDSPAN_INLINE_FUNCTION +constexpr auto +layout_stride::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + auto inv_map = detail::inv_map_rank( + std::integral_constant(), + std::index_sequence<>(), + slices...); + using dst_mapping_t = typename layout_stride::template mapping; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? + this->required_span_size() : + this->operator()(detail::first_of(slices)...) + ); + + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, detail::construct_sub_strides( + *this, inv_map, + // HIP needs deduction guides to have markups so we need to be explicit + // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue + #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) + std::tuple(detail::stride_of(slices)...))), +#else + std::tuple(detail::stride_of(slices)...))), +#endif + offset}; +} + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +template +MDSPAN_INLINE_FUNCTION +constexpr auto +submdspan(const mdspan &src, + SliceSpecifiers... slices) { + const auto sub_submdspan_mapping_result = submdspan_mapping(src.mapping(), slices...); + // NVCC has a problem with the deduction so lets figure out the type + using sub_mapping_t = std::remove_cv_t; + using sub_extents_t = typename sub_mapping_t::extents_type; + using sub_layout_t = typename sub_mapping_t::layout_type; + using sub_accessor_t = typename AccessorPolicy::offset_policy; + return mdspan( + src.accessor().offset(src.data_handle(), sub_submdspan_mapping_result.offset), + sub_submdspan_mapping_result.mapping, + sub_accessor_t(src.accessor())); +} +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2630_bits/submdspan.hpp +#endif +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2389_bits/dims.hpp //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ //@HEADER -*/ +// backward compatibility import into experimental +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { -//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p1684_bits/../mdspan -/* +template< ::std::size_t Rank, class IndexType = std::size_t> +using dims = + :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; + +} // namespace MDSPAN_IMPL_PROPOSED_NAMESPACE +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p2389_bits/dims.hpp + +#endif // MDSPAN_HPP_ +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/mdspan/mdspan.hpp + +// backward compatibility import into experimental +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { + namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan; + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::extents; + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left; + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right; + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride; + using ::MDSPAN_IMPL_STANDARD_NAMESPACE::default_accessor; + } +} +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/mdspan +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/mdspan/mdarray.hpp //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2019) Sandia Corporation +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. +//@HEADER + +#ifndef MDARRAY_HPP_ +#define MDARRAY_HPP_ + +#ifndef MDSPAN_IMPL_STANDARD_NAMESPACE + #define MDSPAN_IMPL_STANDARD_NAMESPACE Kokkos +#endif + +#ifndef MDSPAN_IMPL_PROPOSED_NAMESPACE + #define MDSPAN_IMPL_PROPOSED_NAMESPACE Experimental +#endif + +//BEGIN_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p1684_bits/mdarray.hpp +//@HEADER +// ************************************************************************ // -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. // -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// ************************************************************************ //@HEADER -*/ -//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p1684_bits/../mdspan +#include +#include -namespace std { -namespace experimental { +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { namespace { template @@ -5703,14 +5817,14 @@ namespace { namespace { template - struct container_is_array : false_type { + struct container_is_array : std::false_type { template static constexpr C construct(const M& m) { return C(m.required_span_size()); } }; template - struct container_is_array> : true_type { + struct container_is_array> : std::true_type { template - static constexpr array construct(const M&) { return array(); } + static constexpr std::array construct(const M&) { return std::array(); } }; } @@ -5718,12 +5832,12 @@ template < class ElementType, class Extents, class LayoutPolicy = layout_right, - class Container = vector + class Container = std::vector > class mdarray { private: - static_assert(detail::__is_extents_v, "std::experimental::mdspan's Extents template parameter must be a specialization of std::experimental::extents."); - + static_assert(::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::__is_extents_v, + MDSPAN_IMPL_PROPOSED_NAMESPACE_STRING "::mdspan's Extents template parameter must be a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); public: @@ -5735,7 +5849,9 @@ class mdarray { using container_type = Container; using mapping_type = typename layout_type::template mapping; using element_type = ElementType; - using value_type = remove_cv_t; + using mdspan_type = mdspan; + using const_mdspan_type = mdspan; + using value_type = std::remove_cv_t; using index_type = typename Extents::index_type; using size_type = typename Extents::size_type; using rank_type = typename Extents::rank_type; @@ -5764,10 +5880,10 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_TRAIT(is_constructible, extents_type, SizeTypes...) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) && - (_MDSPAN_TRAIT(is_constructible, container_type, size_t) || + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) && + _MDSPAN_TRAIT( std::is_constructible, extents_type, SizeTypes...) && + _MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type) && + (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t) || container_is_array::value) && (extents_type::rank()>0 || extents_type::rank_dynamic()==0) ) @@ -5780,94 +5896,62 @@ class mdarray { MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), mdarray, (const extents_type& exts), , - /* requires */ ((_MDSPAN_TRAIT(is_constructible, container_type, size_t) || + /* requires */ ((_MDSPAN_TRAIT( std::is_constructible, container_type, size_t) || container_is_array::value) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + _MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) : map_(exts), ctr_(container_is_array::construct(map_)) { } MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), mdarray, (const mapping_type& m), , - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, size_t) || + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t) || container_is_array::value) ) : map_(m), ctr_(container_is_array::construct(map_)) { } - // Constructors from container - MDSPAN_TEMPLATE_REQUIRES( - class... SizeTypes, - /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_TRAIT(is_constructible, extents_type, SizeTypes...) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) - ) - ) - MDSPAN_INLINE_FUNCTION - explicit constexpr mdarray(const container_type& ctr, SizeTypes... dynamic_extents) - : map_(extents_type(dynamic_extents...)), ctr_(ctr) - { assert(ctr.size() >= static_cast(map_.required_span_size())); } - - MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), - mdarray, (const container_type& ctr, const extents_type& exts), , - /* requires */ (_MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + mdarray, (const extents_type& exts, const container_type& ctr), , + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) : map_(exts), ctr_(ctr) { assert(ctr.size() >= static_cast(map_.required_span_size())); } - constexpr mdarray(const container_type& ctr, const mapping_type& m) + constexpr mdarray(const mapping_type& m, const container_type& ctr) : map_(m), ctr_(ctr) { assert(ctr.size() >= static_cast(map_.required_span_size())); } - - // Constructors from container - MDSPAN_TEMPLATE_REQUIRES( - class... SizeTypes, - /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - _MDSPAN_TRAIT(is_constructible, extents_type, SizeTypes...) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type) - ) - ) - MDSPAN_INLINE_FUNCTION - explicit constexpr mdarray(container_type&& ctr, SizeTypes... dynamic_extents) - : map_(extents_type(dynamic_extents...)), ctr_(std::move(ctr)) - { assert(ctr_.size() >= static_cast(map_.required_span_size())); } - - MDSPAN_FUNCTION_REQUIRES( (MDSPAN_INLINE_FUNCTION constexpr), - mdarray, (container_type&& ctr, const extents_type& exts), , - /* requires */ (_MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + mdarray, (const extents_type& exts, container_type&& ctr), , + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) : map_(exts), ctr_(std::move(ctr)) { assert(ctr_.size() >= static_cast(map_.required_span_size())); } - constexpr mdarray(container_type&& ctr, const mapping_type& m) + constexpr mdarray(const mapping_type& m, container_type&& ctr) : map_(m), ctr_(std::move(ctr)) { assert(ctr_.size() >= static_cast(map_.required_span_size())); } - MDSPAN_TEMPLATE_REQUIRES( class OtherElementType, class OtherExtents, class OtherLayoutPolicy, class OtherContainer, /* requires */ ( - _MDSPAN_TRAIT(is_constructible, mapping_type, typename OtherLayoutPolicy::template mapping) && - _MDSPAN_TRAIT(is_constructible, container_type, OtherContainer) + _MDSPAN_TRAIT( std::is_constructible, mapping_type, typename OtherLayoutPolicy::template mapping) && + _MDSPAN_TRAIT( std::is_constructible, container_type, OtherContainer) ) ) MDSPAN_INLINE_FUNCTION constexpr mdarray(const mdarray& other) : map_(other.mapping()), ctr_(other.container()) { - static_assert(is_constructible::value, ""); + static_assert( std::is_constructible::value, ""); } // Constructors for container types constructible from a size and allocator MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, size_t, Alloc) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t, Alloc) && + _MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) MDSPAN_INLINE_FUNCTION constexpr mdarray(const extents_type& exts, const Alloc& a) @@ -5876,7 +5960,7 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, size_t, Alloc)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t, Alloc)) ) MDSPAN_INLINE_FUNCTION constexpr mdarray(const mapping_type& map, const Alloc& a) @@ -5886,54 +5970,54 @@ class mdarray { // Constructors for container types constructible from a container and allocator MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, container_type, Alloc) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, container_type, Alloc) && + _MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) MDSPAN_INLINE_FUNCTION - constexpr mdarray(const container_type& ctr, const extents_type& exts, const Alloc& a) + constexpr mdarray(const extents_type& exts, const container_type& ctr, const Alloc& a) : map_(exts), ctr_(ctr, a) { assert(ctr_.size() >= static_cast(map_.required_span_size())); } MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, size_t, Alloc)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t, Alloc)) ) MDSPAN_INLINE_FUNCTION - constexpr mdarray(const container_type& ctr, const mapping_type& map, const Alloc& a) + constexpr mdarray(const mapping_type& map, const container_type& ctr, const Alloc& a) : map_(map), ctr_(ctr, a) { assert(ctr_.size() >= static_cast(map_.required_span_size())); } MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, container_type, Alloc) && - _MDSPAN_TRAIT(is_constructible, mapping_type, extents_type)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, container_type, Alloc) && + _MDSPAN_TRAIT( std::is_constructible, mapping_type, extents_type)) ) MDSPAN_INLINE_FUNCTION - constexpr mdarray(container_type&& ctr, const extents_type& exts, const Alloc& a) + constexpr mdarray(const extents_type& exts, container_type&& ctr, const Alloc& a) : map_(exts), ctr_(std::move(ctr), a) { assert(ctr_.size() >= static_cast(map_.required_span_size())); } MDSPAN_TEMPLATE_REQUIRES( class Alloc, - /* requires */ (_MDSPAN_TRAIT(is_constructible, container_type, size_t, Alloc)) + /* requires */ (_MDSPAN_TRAIT( std::is_constructible, container_type, size_t, Alloc)) ) MDSPAN_INLINE_FUNCTION - constexpr mdarray(container_type&& ctr, const mapping_type& map, const Alloc& a) + constexpr mdarray(const mapping_type& map, container_type&& ctr, const Alloc& a) : map_(map), ctr_(std::move(ctr), a) { assert(ctr_.size() >= map_.required_span_size()); } MDSPAN_TEMPLATE_REQUIRES( class OtherElementType, class OtherExtents, class OtherLayoutPolicy, class OtherContainer, class Alloc, /* requires */ ( - _MDSPAN_TRAIT(is_constructible, mapping_type, typename OtherLayoutPolicy::template mapping) && - _MDSPAN_TRAIT(is_constructible, container_type, OtherContainer, Alloc) + _MDSPAN_TRAIT( std::is_constructible, mapping_type, typename OtherLayoutPolicy::template mapping) && + _MDSPAN_TRAIT( std::is_constructible, container_type, OtherContainer, Alloc) ) ) MDSPAN_INLINE_FUNCTION constexpr mdarray(const mdarray& other, const Alloc& a) : map_(other.mapping()), ctr_(other.container(), a) { - static_assert(is_constructible::value, ""); + static_assert( std::is_constructible::value, ""); } MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mdarray& operator= (const mdarray&) = default; @@ -5948,7 +6032,7 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT( std::is_convertible, SizeTypes, index_type) /* && ... */) && extents_type::rank() == sizeof...(SizeTypes) ) ) @@ -5961,7 +6045,7 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && + _MDSPAN_FOLD_AND(_MDSPAN_TRAIT( std::is_convertible, SizeTypes, index_type) /* && ... */) && extents_type::rank() == sizeof...(SizeTypes) ) ) @@ -5976,12 +6060,12 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && + _MDSPAN_TRAIT( std::is_convertible, SizeType, index_type) && N == extents_type::rank() ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr const_reference operator[](const array& indices) const noexcept + constexpr const_reference operator[](const std::array& indices) const noexcept { return __impl::template __callop(*this, indices); } @@ -5989,12 +6073,12 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && + _MDSPAN_TRAIT( std::is_convertible, SizeType, index_type) && N == extents_type::rank() ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator[](const array& indices) noexcept + constexpr reference operator[](const std::array& indices) noexcept { return __impl::template __callop(*this, indices); } @@ -6005,8 +6089,8 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - extents_type::rank() == sizeof...(SizeTypes) + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) && + extents_type::rank() == sizeof...(SizeTypes) ) ) MDSPAN_FORCE_INLINE_FUNCTION @@ -6017,8 +6101,8 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class... SizeTypes, /* requires */ ( - _MDSPAN_FOLD_AND(_MDSPAN_TRAIT(is_convertible, SizeTypes, index_type) /* && ... */) && - extents_type::rank() == sizeof...(SizeTypes) + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) && + extents_type::rank() == sizeof...(SizeTypes) ) ) MDSPAN_FORCE_INLINE_FUNCTION @@ -6031,12 +6115,12 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && + _MDSPAN_TRAIT( std::is_convertible, SizeType, index_type) && N == extents_type::rank() ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr const_reference operator()(const array& indices) const noexcept + constexpr const_reference operator()(const std::array& indices) const noexcept { return __impl::template __callop(*this, indices); } @@ -6044,12 +6128,12 @@ class mdarray { MDSPAN_TEMPLATE_REQUIRES( class SizeType, size_t N, /* requires */ ( - _MDSPAN_TRAIT(is_convertible, SizeType, index_type) && + _MDSPAN_TRAIT( std::is_convertible, SizeType, index_type) && N == extents_type::rank() ) ) MDSPAN_FORCE_INLINE_FUNCTION - constexpr reference operator()(const array& indices) noexcept + constexpr reference operator()(const std::array& indices) noexcept { return __impl::template __callop(*this, indices); } @@ -6068,7 +6152,7 @@ class mdarray { MDSPAN_INLINE_FUNCTION static constexpr rank_type rank_dynamic() noexcept { return extents_type::rank_dynamic(); } MDSPAN_INLINE_FUNCTION static constexpr size_t static_extent(size_t r) noexcept { return extents_type::static_extent(r); } - MDSPAN_INLINE_FUNCTION constexpr extents_type extents() const noexcept { return map_.extents(); }; + MDSPAN_INLINE_FUNCTION constexpr const extents_type& extents() const noexcept { return map_.extents(); }; MDSPAN_INLINE_FUNCTION constexpr index_type extent(size_t r) const noexcept { return map_.extents().extent(r); }; MDSPAN_INLINE_FUNCTION constexpr index_type size() const noexcept { // return __impl::__size(*this); @@ -6083,12 +6167,63 @@ class mdarray { MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { return mapping_type::is_always_exhaustive(); }; MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return mapping_type::is_always_strided(); }; - MDSPAN_INLINE_FUNCTION constexpr mapping_type mapping() const noexcept { return map_; }; + MDSPAN_INLINE_FUNCTION constexpr const mapping_type& mapping() const noexcept { return map_; }; MDSPAN_INLINE_FUNCTION constexpr bool is_unique() const noexcept { return map_.is_unique(); }; MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { return map_.is_exhaustive(); }; MDSPAN_INLINE_FUNCTION constexpr bool is_strided() const noexcept { return map_.is_strided(); }; MDSPAN_INLINE_FUNCTION constexpr index_type stride(size_t r) const { return map_.stride(r); }; + // Converstion to mdspan + MDSPAN_TEMPLATE_REQUIRES( + class OtherElementType, class OtherExtents, + class OtherLayoutType, class OtherAccessorType, + /* requires */ ( + _MDSPAN_TRAIT(std::is_assignable, + mdspan, + mdspan_type) + ) + ) + constexpr operator mdspan () { + return mdspan_type(data(), map_); + } + + MDSPAN_TEMPLATE_REQUIRES( + class OtherElementType, class OtherExtents, + class OtherLayoutType, class OtherAccessorType, + /* requires */ ( + _MDSPAN_TRAIT(std::is_assignable, + mdspan, + const_mdspan_type) + ) + ) + constexpr operator mdspan () const { + return const_mdspan_type(data(), map_); + } + + MDSPAN_TEMPLATE_REQUIRES( + class OtherAccessorType = default_accessor, + /* requires */ ( + _MDSPAN_TRAIT(std::is_assignable, mdspan_type, + mdspan) + ) + ) + constexpr mdspan + to_mdspan(const OtherAccessorType& a = default_accessor()) { + return mdspan(data(), map_, a); + } + + MDSPAN_TEMPLATE_REQUIRES( + class OtherAccessorType = default_accessor, + /* requires */ ( + _MDSPAN_TRAIT(std::is_assignable, const_mdspan_type, + mdspan) + ) + ) + constexpr mdspan + to_mdspan(const OtherAccessorType& a = default_accessor()) const { + return mdspan(data(), map_, a); + } + private: mapping_type map_; container_type ctr_; @@ -6098,10 +6233,12 @@ class mdarray { }; -} // end namespace experimental -} // end namespace std +} // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE +} // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/__p1684_bits/mdarray.hpp +#endif // MDARRAY_HPP_ +//END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/mdspan/mdarray.hpp //END_FILE_INCLUDE: /home/runner/work/mdspan/mdspan/include/experimental/mdarray #endif // _MDSPAN_SINGLE_HEADER_INCLUDE_GUARD_ diff --git a/gpu_utils/common/pointer_casts.hpp b/gpu_utils/common/pointer_casts.hpp index b3123df..5fa5fba 100644 --- a/gpu_utils/common/pointer_casts.hpp +++ b/gpu_utils/common/pointer_casts.hpp @@ -3,11 +3,24 @@ #include "cuda_host_dev.H" #include -#ifdef __NVIDIA_COMPILER__ + +template static constexpr inline auto make_raw_pointer(T p) { + static_assert(std::is_pointer::value, "Not a pointer type."); + return p; +} + +template static constexpr inline auto make_device_pointer(T p) { + static_assert(std::is_pointer::value, "Not a pointer type."); + return p; +} + + +/* +#ifdef __USING_GPU__ #include #endif -#ifdef __NVIDIA_COMPILER__ +#ifdef __USING_GPU__ template static constexpr inline auto make_raw_pointer(T p) { return thrust::raw_pointer_cast(p); @@ -29,3 +42,5 @@ template static constexpr inline auto make_device_pointer(T p) { return p; } #endif +*/ + diff --git a/gpu_utils/common/timer.H b/gpu_utils/common/timer.H index 330c764..7d3f1cd 100644 --- a/gpu_utils/common/timer.H +++ b/gpu_utils/common/timer.H @@ -11,7 +11,7 @@ static inline CUDA_HOSTDEV double second() { -#ifdef __NVIDIA_COMPILER__ +#ifdef __NVIDIA_BACKEND__ return static_cast(clock()); diff --git a/gpu_utils/common/variant/variant.hpp b/gpu_utils/common/variant/variant.hpp index a39ecaf..c1d8992 100644 --- a/gpu_utils/common/variant/variant.hpp +++ b/gpu_utils/common/variant/variant.hpp @@ -6,7 +6,7 @@ #include #include "cuda_host_dev.H" -#ifdef __NVIDIA_COMPILER__ +#ifdef __USING_GPU__ #include #endif @@ -443,7 +443,7 @@ struct variant { return *this; } - #ifdef __NVIDIA_COMPILER__ + #ifdef __USING_GPU__ //XXX WAR Thrust presenting references during copies //More WAR may be necessary. diff --git a/hipcc b/hipcc new file mode 100644 index 0000000..9c836fa --- /dev/null +++ b/hipcc @@ -0,0 +1,31 @@ +SUFFIXES += .cu + +c++WARN = + +HIPCC_FLAGS = --std=c++17 +HIPCC_FLAGS += -Wno-old-style-cast +HIPCC_FLAGS += -O3 +HIPCC_FLAGS += -DHIP_FAST_MATH +HIPCC_FLAGS += --offload-arch=gfx90a + + + +CC = hipcc $(HIPCC_FLAGS) + +ptFLAGS = -DNoRepository + +c++FLAGS = $(GFLAGS) $(c++WARN) $(c++DBUG) $(ptFLAGS) \ + $(LIB_HEADER_DIRS) -fPIC + + +Ctoo = $(WM_SCHEDULER) $(CC) $(c++FLAGS) -c $< -o $@ +cxxtoo = $(Ctoo) +cctoo = $(Ctoo) +cpptoo = $(Ctoo) +cutoo = $(Ctoo) + +LINK_LIBS = $(c++DBUG) + +LINKLIBSO = $(CC) $(c++FLAGS) -shared + +LINKEXE = $(CC) $(c++FLAGS) diff --git a/gpu_chemistry/nvcc b/nvcc similarity index 83% rename from gpu_chemistry/nvcc rename to nvcc index 7fee8d2..435a1da 100644 --- a/gpu_chemistry/nvcc +++ b/nvcc @@ -6,12 +6,11 @@ c++WARN = NVCC_FLAGS = --std=c++17 NVCC_FLAGS += --expt-relaxed-constexpr NVCC_FLAGS += --expt-extended-lambda +NVCC_FLAGS += -O3 NVCC_FLAGS += --use_fast_math NVCC_FLASG += --generate-line-info NVCC_FLAGS += -lineinfo -#NVCC_FLAGS += -fmad=false -NVCC_FLAGS += --gpu-architecture=compute_80 --gpu-code=sm_80 -#NVCC_FLAGS += --gpu-architecture=compute_86 --gpu-code=sm_86 +NVCC_FLAGS += --gpu-architecture=compute_86 --gpu-code=sm_86 CC = nvcc $(NVCC_FLAGS)