From ae4ee28b63ddb6ac22112326a299c60ab401a8a0 Mon Sep 17 00:00:00 2001 From: tirimatangi Date: Thu, 5 Nov 2020 22:03:01 +0200 Subject: [PATCH] Fix array output --- Lazy.h | 34 ++++++++++------ README.md | 22 ++++++++++- example-2.cc | 107 +++++++++++++++++++++++++++++++++++---------------- 3 files changed, 117 insertions(+), 46 deletions(-) diff --git a/Lazy.h b/Lazy.h index e55540e..08e4792 100644 --- a/Lazy.h +++ b/Lazy.h @@ -230,7 +230,7 @@ auto makeNewThread(U1 u1, U2 u2) // Overload for several input arguments. // Note that now the arguments will be copied, not moved -// because the capture list can not contain move(u)... +// because the capture list can not contain move(u)... until C++20. template auto makeNewThread(const U&... u) { @@ -498,12 +498,15 @@ struct ResultWatcher } }; -// Debug helper -template -void pretty_function(const T& t) +// Debug helper for finding out types T... +template +void pretty_function(const T&...) { - (void)t; - // std::cout << "pretty_function = " << __PRETTY_FUNCTION__ << "\n"; +#if 0 // enable if iostream works in your system + std::stringstream ss; + ss << "pretty_function = " << __PRETTY_FUNCTION__ << "\n"; + std::cout << ss.str(); +#endif } // Executes "y = func(x)" for each x in vector vecX in a lock-free thread pool. @@ -548,7 +551,7 @@ auto runForAll(const Vec& vecX, Func&& func) for(std::thread& thr : aThreadPool) if (thr.joinable()) - thr.join(); + thr.join(); } else { // Threadpool is a vector of threads living in heap. auto uNumThreads = std::min(std::size_t(std::thread::hardware_concurrency()), vecX.size()); @@ -558,7 +561,7 @@ auto runForAll(const Vec& vecX, Func&& func) for(std::thread& thr : vecThreadPool) if (thr.joinable()) - thr.join(); + thr.join(); } // Deal with possible exception @@ -614,7 +617,7 @@ auto runForAllInArray(const Arr& arrX, Func&& func, std::index_sequence) // Executes "y = func(x)" for each x in array arrX in a separate thread. // There will be as many parallel threads as there are elements in the array. // Returns an array of y's. -template +template auto runForAll(const std::array& arrX, Func&& func) { return runForAllInArray(arrX, std::forward(func), std::make_index_sequence{}); @@ -653,13 +656,22 @@ auto nested(T t, F&& f, Fs&&... fs) for (int i = 0; i < numDecimals; ++i, r *= 10); return int(r * z);}); */ -template -auto runForAll(const Vec& x, Funcs&&... funcs) +template +auto runForAll(const std::vector& x, Funcs&&... funcs) { auto nestedFuncs = [&funcs...](auto t) { return nested(t, funcs...); }; return runForAll(x, nestedFuncs); } +// Note: MaxThreads template parameter is ignored. +// There are always as many threads as there are elements in the array. +template +auto runForAll(const std::array& arrX, Funcs&&... funcs) +{ + auto nestedFuncs = [&funcs...](auto t) { return nested(t, funcs...); }; + return runForAll(arrX, nestedFuncs); +} + // Overload of the above function for initializer list input. template auto runForAll(std::initializer_list lstX, Funcs&&... funcs) diff --git a/README.md b/README.md index 6573703..e9def23 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,9 @@ In this example, one of the three parallel functions may throw. } ``` +The output will be `i = 100, d = 3.16228, s = "10"`. + + There are more examples on how to use `Lazy::runParallel` in [example-1.cc](example-1.cc). For an example on how to use stop tokens to communicate between the functions, see example 1.2 in example-1.cc. @@ -66,6 +69,14 @@ Here is an example on running a sequence of 3 continuations where the inputs are } ``` +The output will be +``` +0 --> 1.22474 +10 --> 10.0747 +20 --> 14.1951 +30 --> 17.3638 +``` + Notice that if the input were an `std::array` instead of `std::vector`, the output would be an `std::array` instead of `std::vector`. There are no heap allocations if the input is an `std::array`. @@ -121,7 +132,16 @@ std::size_t indexOf(const Vec& vec, } ``` -For other methods provided by `Lazy::StopToken`, see `class StopToken` in the beginning of `Lazy.h`. + +The output will be +``` +Quarter #0 returns index -1 -> not found +Quarter #1 returns index -1 -> not found +Quarter #2 returns index 543 -> FOUND ! +Quarter #3 returns index -1 -> not found +``` + +For other methods provided by `Lazy::StopToken`, see `class StopToken` in the beginning of [Lazy.h](Lazy.h). For more examples on how to use `Lazy::runForAll`, see [example-2.cc](example-2.cc). diff --git a/example-2.cc b/example-2.cc index 2f0a0e8..33a9876 100644 --- a/example-2.cc +++ b/example-2.cc @@ -72,15 +72,20 @@ int main() { std::cout << "Hey! Your machine has " << std::thread::hardware_concurrency() << " cores!\n"; + // Make input vector and input array int iVectorLength = 10 * std::thread::hardware_concurrency(); - - // Example 2.1: Call a function concurrently once for each element of the input vector and - // store the results to the output vector. - std::cout << "\n*** Example 2.1 *** : Call a function for each value in the input vector.\n"; std::vector vecInput(iVectorLength); for (int i = 0; i < iVectorLength; ++i) vecInput[i] = 100 * i; + constexpr std::size_t szArrayLength = 100; + std::array arrInput; + for (std::size_t i = 0; i < szArrayLength; ++i) + arrInput[i] = 100 * i; + + // Example 2.1: Call a function concurrently once for each element of the input vector and + // store the results to the output vector. + std::cout << "\n*** Example 2.1 *** : Call a function for each value in the input vector.\n"; { // Set vecOutput[i] = func(vecInput[i]) for each i running in a separate thread. // The number of parallel threads will be limited to the number of cores. @@ -90,26 +95,33 @@ int main() } { // The number of parallel threads can also be given as a template parameter. Use 10 in this example. - std::vector vecOutput = Lazy::runForAll<10>(vecInput, [](auto x) { return intSqrt(x * 100) * 0.1; }); + auto vecOutput = Lazy::runForAll<10>(vecInput, [](auto x) { return intSqrt(x * 100) * 0.1; }); + + static_assert(std::is_same_v>, + "2.1.2: Output vector type does not match!"); + std::cout << "2.1.2: Input vector length = " << vecInput.size() << ", output vector length = " << vecOutput.size() << "\n"; } { // The input can also be an array. There will be as many parallel threads as // there are elements in the array. There will be no heap allocations. - std::array arrInput; - for (int i = 0; i < arrInput.size(); ++i) - arrInput[i] = 100 * i; - std::array arrOutput = Lazy::runForAll(arrInput, intSqrt); + auto arrOutput = Lazy::runForAll(arrInput, intSqrt); + static_assert(std::is_same_v>, + "2.1.3: Output array type does not match!"); std::cout << "2.1.3: Input array length = " << arrInput.size() << ", output array length = " << arrOutput.size() << "\n"; } { // Initializer lists are also supported. The output is an std::vector. auto vecOutput = Lazy::runForAll({33,22,77,99,88}, [](auto x) { return x - 0.5; }); + static_assert(std::is_same_v>, + "2.1.4: Output vector type does not match!"); std::cout << "2.1.4: input values are {33,22,77,99,88}, output vector is {" << vecOutput[0] << ", " << vecOutput[1] << ", " << vecOutput[2] << ", " << vecOutput[3] << ", " << vecOutput[4] <<"}\n"; // If you want to avoid heap allocation, you can use initialized std:array - std::array arrOutput = Lazy::runForAll(std::array{33,22,77,99,88}, [](auto x) { return x - 0.5; }); + auto arrOutput = Lazy::runForAll(std::array{33,22,77,99,88}, [](auto x) { return x - 0.5; }); + static_assert(std::is_same_v>, + "2.1.5: Output array type does not match!"); std::cout << "2.1.5: input values are {33,22,77,99,88}, output array is {" << arrOutput[0] << ", " << arrOutput[1] << ", " << arrOutput[2] << ", " << arrOutput[3] << ", " << arrOutput[4] <<"}\n"; } @@ -119,14 +131,30 @@ int main() // vecOutput[i] = f3(f2(f1((vecInput[i]))) std::cout << "\n*** Example 2.2 *** : Run a set of continuations for each value in the input vector.\n"; { - // out = sqrt((10 * in) + 1.5) + // vector out = sqrt((10 * in) + 1.5). auto vecOutput = Lazy::runForAll(vecInput, [](auto x) { return 10 * x; }, [](auto x) { return x + 1.5; }, [](auto x) { return std::sqrt(x); }); + static_assert(std::is_same_v>, + "2.2: Output vector type does not match!"); + std::cout << "2.2: Input vector length = " << vecInput.size() << ", output vector length = " << vecOutput.size() << "\n"; - std::cout << " Last input value = " << vecInput.back() << ", last output value = " << vecOutput.back() << ".\n"; + std::cout << " Last input value = " << vecInput.back() << ", last output value = " << vecOutput.back() << ".\n"; + + // array out = sqrt((10 * in) + 1.5) + // Array input uses always as many threads as there are elements in the array. + auto arrOutput = Lazy::runForAll(arrInput, + [](auto x) { return 10 * x; }, + [](auto x) { return x + 1.5; }, + [](auto x) { return std::sqrt(x); }); + + static_assert(std::is_same_v>, + "2.2: Output array type does not match!"); + + std::cout << " Input array length = " << arrInput.size() << ", output vector length = " << arrOutput.size() << "\n"; + std::cout << " Last input value = " << arrInput.back() << ", last output value = " << arrOutput.back() << ".\n"; } // Example 2.3: One or more functions in one or more threads throw. The exception can be caught normally. @@ -134,15 +162,15 @@ int main() { try { auto vecOutput = Lazy::runForAll({2,1,0,-1,2}, - [](auto x) { return 100 * x; }, - [](auto x) { if (x < 0) - throw std::runtime_error("[[Negative sqrt]]"); - return std::sqrt(x); }); + [](auto x) { return 100 * x; }, + [](auto x) { if (x < 0) + throw std::runtime_error("[[Negative sqrt]]"); + return std::sqrt(x); }); std::cout << "2.3: Output vector is {" << vecOutput[0] << ", " << vecOutput[1] << ", " << vecOutput[2] << ", " << vecOutput[3] << ", " << vecOutput[4] <<"}\n"; } catch (const std::exception& e) { - std::cout << "EXCEPTION: " << e.what() <<"\n"; + std::cout << "EXCEPTION: " << e.what() << "\n"; } } @@ -158,21 +186,21 @@ int main() for (int i = 0; i < vec.size(); ++i) vec[vec.size() - i - 1] = 10 * i; - // Make an array of {from, to}-index pairs. Can be either an array or a vector. -#if 1 - const std::size_t N = 10; // Number of parallel threads - std::vector> aPairs(N); -#else - constexpr std::size_t N = 10; // Number of parallel threads - std::array, N> aPairs; -#endif + // Make an array of {from, to}-index pairs. + // Can be either an array or a vector. Both are used for demonstration. + constexpr std::size_t N = 10; // Number of parallel finder threads + std::vector> vecPairs(N); + std::array, N> arrPairs; + double dFrom = 0, dTo = 0; for (auto i = 0; i < N; ++i) { dFrom = dTo; dTo += vec.size() / double(N); - aPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())}; + vecPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())}; + arrPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())}; } - aPairs[N-1].second = vec.size(); + vecPairs[N-1].second = vec.size(); + arrPairs[N-1].second = vec.size(); int iFindMe = 5500; // Find this value from vector vec. // Finder function which inputs a StopToken and an index pair (from, to) @@ -185,20 +213,31 @@ int main() // Run the finder in parallel for all index pairs // A StopToken object is created automatically by the library // because finder takes one as the first argument. - auto vecIndex = Lazy::runForAll(aPairs, finder); + // The job is done twice using both an array and a vector for demonstration. + auto vecIndex = Lazy::runForAll(vecPairs, finder); + auto arrIndex = Lazy::runForAll(arrPairs, finder); + + static_assert(std::is_same_v>, + "2.4: Output array type does not match!"); + static_assert(std::is_same_v>, + "2.4: Output vector type does not match!"); std::cout << "2.4: Finder results for value "< vecIn; - for(int n = 1; n <= 5; ++n) - vecIn.push_back(n); // Use nullptr_t as the dummy return type - Lazy::runForAll(vecIn, [](auto n) { myVoidFunction(n); return nullptr;}); + Lazy::runForAll({1,2,3,4}, [](auto n) { myVoidFunction(n); return nullptr;}); } }