Skip to content

Commit

Permalink
Merge pull request #5 from tirimatangi/fix-array-output
Browse files Browse the repository at this point in the history
Fix array output
  • Loading branch information
tirimatangi authored Nov 5, 2020
2 parents a80dd64 + ae4ee28 commit 947874c
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 46 deletions.
34 changes: 23 additions & 11 deletions Lazy.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ auto makeNewThread(U1 u1, U2 u2)

// Overload for several input arguments.
// Note that now the arguments will be copied, not moved
// because the capture list can not contain move(u)...
// because the capture list can not contain move(u)... until C++20.
template <class... U>
auto makeNewThread(const U&... u)
{
Expand Down Expand Up @@ -498,12 +498,15 @@ struct ResultWatcher
}
};

// Debug helper
template <class T>
void pretty_function(const T& t)
// Debug helper for finding out types T...
template <class... T>
void pretty_function(const T&...)
{
(void)t;
// std::cout << "pretty_function = " << __PRETTY_FUNCTION__ << "\n";
#if 0 // enable if iostream works in your system
std::stringstream ss;
ss << "pretty_function = " << __PRETTY_FUNCTION__ << "\n";
std::cout << ss.str();
#endif
}

// Executes "y = func(x)" for each x in vector vecX in a lock-free thread pool.
Expand Down Expand Up @@ -548,7 +551,7 @@ auto runForAll(const Vec& vecX, Func&& func)

for(std::thread& thr : aThreadPool)
if (thr.joinable())
thr.join();
thr.join();
}
else { // Threadpool is a vector of threads living in heap.
auto uNumThreads = std::min(std::size_t(std::thread::hardware_concurrency()), vecX.size());
Expand All @@ -558,7 +561,7 @@ auto runForAll(const Vec& vecX, Func&& func)

for(std::thread& thr : vecThreadPool)
if (thr.joinable())
thr.join();
thr.join();
}

// Deal with possible exception
Expand Down Expand Up @@ -614,7 +617,7 @@ auto runForAllInArray(const Arr& arrX, Func&& func, std::index_sequence<I...>)
// Executes "y = func(x)" for each x in array arrX in a separate thread.
// There will be as many parallel threads as there are elements in the array.
// Returns an array of y's.
template <class U, std::size_t N, class Func>
template <int MaxThreads = 0, class U, std::size_t N, class Func>
auto runForAll(const std::array<U, N>& arrX, Func&& func)
{
return runForAllInArray(arrX, std::forward<Func>(func), std::make_index_sequence<N>{});
Expand Down Expand Up @@ -653,13 +656,22 @@ auto nested(T t, F&& f, Fs&&... fs)
for (int i = 0; i < numDecimals; ++i, r *= 10);
return int(r * z);});
*/
template <int MaxThreads = 0, class Vec, class... Funcs>
auto runForAll(const Vec& x, Funcs&&... funcs)
template <int MaxThreads = 0, class U, class... Funcs>
auto runForAll(const std::vector<U>& x, Funcs&&... funcs)
{
auto nestedFuncs = [&funcs...](auto t) { return nested(t, funcs...); };
return runForAll<MaxThreads>(x, nestedFuncs);
}

// Note: MaxThreads template parameter is ignored.
// There are always as many threads as there are elements in the array.
template <int MaxThreads = 0, class U, std::size_t N, class... Funcs>
auto runForAll(const std::array<U, N>& arrX, Funcs&&... funcs)
{
auto nestedFuncs = [&funcs...](auto t) { return nested(t, funcs...); };
return runForAll(arrX, nestedFuncs);
}

// Overload of the above function for initializer list input.
template <int MaxThreads = 0, class U, class... Funcs>
auto runForAll(std::initializer_list<U> lstX, Funcs&&... funcs)
Expand Down
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ In this example, one of the three parallel functions may throw.
}
```
The output will be `i = 100, d = 3.16228, s = "10"`.
There are more examples on how to use `Lazy::runParallel` in [example-1.cc](example-1.cc).
For an example on how to use stop tokens to communicate between the functions, see example 1.2 in example-1.cc.
Expand Down Expand Up @@ -66,6 +69,14 @@ Here is an example on running a sequence of 3 continuations where the inputs are
}
```

The output will be
```
0 --> 1.22474
10 --> 10.0747
20 --> 14.1951
30 --> 17.3638
```

Notice that if the input were an `std::array<int, 4>` instead of `std::vector<int>`,
the output would be an `std::array<double, 4>` instead of `std::vector<double>`.
There are no heap allocations if the input is an `std::array`.
Expand Down Expand Up @@ -121,7 +132,16 @@ std::size_t indexOf(const Vec& vec,
}

```
For other methods provided by `Lazy::StopToken`, see `class StopToken` in the beginning of `Lazy.h`.
The output will be
```
Quarter #0 returns index -1 -> not found
Quarter #1 returns index -1 -> not found
Quarter #2 returns index 543 -> FOUND !
Quarter #3 returns index -1 -> not found
```
For other methods provided by `Lazy::StopToken`, see `class StopToken` in the beginning of [Lazy.h](Lazy.h).
For more examples on how to use `Lazy::runForAll`, see [example-2.cc](example-2.cc).
Expand Down
107 changes: 73 additions & 34 deletions example-2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,20 @@ int main()
{
std::cout << "Hey! Your machine has " << std::thread::hardware_concurrency() << " cores!\n";

// Make input vector and input array
int iVectorLength = 10 * std::thread::hardware_concurrency();

// Example 2.1: Call a function concurrently once for each element of the input vector and
// store the results to the output vector.
std::cout << "\n*** Example 2.1 *** : Call a function for each value in the input vector.\n";
std::vector<int> vecInput(iVectorLength);
for (int i = 0; i < iVectorLength; ++i)
vecInput[i] = 100 * i;

constexpr std::size_t szArrayLength = 100;
std::array<int, szArrayLength> arrInput;
for (std::size_t i = 0; i < szArrayLength; ++i)
arrInput[i] = 100 * i;

// Example 2.1: Call a function concurrently once for each element of the input vector and
// store the results to the output vector.
std::cout << "\n*** Example 2.1 *** : Call a function for each value in the input vector.\n";
{
// Set vecOutput[i] = func(vecInput[i]) for each i running in a separate thread.
// The number of parallel threads will be limited to the number of cores.
Expand All @@ -90,26 +95,33 @@ int main()
}
{
// The number of parallel threads can also be given as a template parameter. Use 10 in this example.
std::vector<double> vecOutput = Lazy::runForAll<10>(vecInput, [](auto x) { return intSqrt(x * 100) * 0.1; });
auto vecOutput = Lazy::runForAll<10>(vecInput, [](auto x) { return intSqrt(x * 100) * 0.1; });

static_assert(std::is_same_v<decltype(vecOutput), std::vector<double>>,
"2.1.2: Output vector type does not match!");

std::cout << "2.1.2: Input vector length = " << vecInput.size() << ", output vector length = " << vecOutput.size() << "\n";
}
{
// The input can also be an array. There will be as many parallel threads as
// there are elements in the array. There will be no heap allocations.
std::array<int, 10> arrInput;
for (int i = 0; i < arrInput.size(); ++i)
arrInput[i] = 100 * i;
std::array arrOutput = Lazy::runForAll(arrInput, intSqrt);
auto arrOutput = Lazy::runForAll(arrInput, intSqrt);
static_assert(std::is_same_v<decltype(arrOutput), std::array<uint16_t, szArrayLength>>,
"2.1.3: Output array type does not match!");
std::cout << "2.1.3: Input array length = " << arrInput.size() << ", output array length = " << arrOutput.size() << "\n";
}
{
// Initializer lists are also supported. The output is an std::vector.
auto vecOutput = Lazy::runForAll({33,22,77,99,88}, [](auto x) { return x - 0.5; });
static_assert(std::is_same_v<decltype(vecOutput), std::vector<double>>,
"2.1.4: Output vector type does not match!");
std::cout << "2.1.4: input values are {33,22,77,99,88}, output vector is {" <<
vecOutput[0] << ", " << vecOutput[1] << ", " << vecOutput[2] << ", " << vecOutput[3] << ", " << vecOutput[4] <<"}\n";

// If you want to avoid heap allocation, you can use initialized std:array
std::array arrOutput = Lazy::runForAll(std::array{33,22,77,99,88}, [](auto x) { return x - 0.5; });
auto arrOutput = Lazy::runForAll(std::array{33,22,77,99,88}, [](auto x) { return x - 0.5; });
static_assert(std::is_same_v<decltype(arrOutput), std::array<double, 5>>,
"2.1.5: Output array type does not match!");
std::cout << "2.1.5: input values are {33,22,77,99,88}, output array is {" <<
arrOutput[0] << ", " << arrOutput[1] << ", " << arrOutput[2] << ", " << arrOutput[3] << ", " << arrOutput[4] <<"}\n";
}
Expand All @@ -119,30 +131,46 @@ int main()
// vecOutput[i] = f3(f2(f1((vecInput[i])))
std::cout << "\n*** Example 2.2 *** : Run a set of continuations for each value in the input vector.\n";
{
// out = sqrt((10 * in) + 1.5)
// vector out = sqrt((10 * in) + 1.5).
auto vecOutput = Lazy::runForAll(vecInput,
[](auto x) { return 10 * x; },
[](auto x) { return x + 1.5; },
[](auto x) { return std::sqrt(x); });

static_assert(std::is_same_v<decltype(vecOutput), std::vector<double>>,
"2.2: Output vector type does not match!");

std::cout << "2.2: Input vector length = " << vecInput.size() << ", output vector length = " << vecOutput.size() << "\n";
std::cout << " Last input value = " << vecInput.back() << ", last output value = " << vecOutput.back() << ".\n";
std::cout << " Last input value = " << vecInput.back() << ", last output value = " << vecOutput.back() << ".\n";

// array out = sqrt((10 * in) + 1.5)
// Array input uses always as many threads as there are elements in the array.
auto arrOutput = Lazy::runForAll(arrInput,
[](auto x) { return 10 * x; },
[](auto x) { return x + 1.5; },
[](auto x) { return std::sqrt(x); });

static_assert(std::is_same_v<decltype(arrOutput), std::array<double, szArrayLength>>,
"2.2: Output array type does not match!");

std::cout << " Input array length = " << arrInput.size() << ", output vector length = " << arrOutput.size() << "\n";
std::cout << " Last input value = " << arrInput.back() << ", last output value = " << arrOutput.back() << ".\n";
}

// Example 2.3: One or more functions in one or more threads throw. The exception can be caught normally.
std::cout << "\n*** Example 2.3 *** : The function may throw for some values of the input vector.\n";
{
try {
auto vecOutput = Lazy::runForAll({2,1,0,-1,2},
[](auto x) { return 100 * x; },
[](auto x) { if (x < 0)
throw std::runtime_error("[[Negative sqrt]]");
return std::sqrt(x); });
[](auto x) { return 100 * x; },
[](auto x) { if (x < 0)
throw std::runtime_error("[[Negative sqrt]]");
return std::sqrt(x); });
std::cout << "2.3: Output vector is {" <<
vecOutput[0] << ", " << vecOutput[1] << ", " << vecOutput[2] << ", " << vecOutput[3] << ", " << vecOutput[4] <<"}\n";
}
catch (const std::exception& e) {
std::cout << "EXCEPTION: " << e.what() <<"\n";
std::cout << "EXCEPTION: " << e.what() << "\n";
}
}

Expand All @@ -158,21 +186,21 @@ int main()
for (int i = 0; i < vec.size(); ++i)
vec[vec.size() - i - 1] = 10 * i;

// Make an array of {from, to}-index pairs. Can be either an array or a vector.
#if 1
const std::size_t N = 10; // Number of parallel threads
std::vector<std::pair<std::size_t, std::size_t>> aPairs(N);
#else
constexpr std::size_t N = 10; // Number of parallel threads
std::array<std::pair<std::size_t, std::size_t>, N> aPairs;
#endif
// Make an array of {from, to}-index pairs.
// Can be either an array or a vector. Both are used for demonstration.
constexpr std::size_t N = 10; // Number of parallel finder threads
std::vector<std::pair<std::size_t, std::size_t>> vecPairs(N);
std::array<std::pair<std::size_t, std::size_t>, N> arrPairs;

double dFrom = 0, dTo = 0;
for (auto i = 0; i < N; ++i) {
dFrom = dTo;
dTo += vec.size() / double(N);
aPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())};
vecPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())};
arrPairs[i] = {std::size_t(dFrom), std::min(std::size_t(dTo), vec.size())};
}
aPairs[N-1].second = vec.size();
vecPairs[N-1].second = vec.size();
arrPairs[N-1].second = vec.size();

int iFindMe = 5500; // Find this value from vector vec.
// Finder function which inputs a StopToken and an index pair (from, to)
Expand All @@ -185,20 +213,31 @@ int main()
// Run the finder in parallel for all index pairs
// A StopToken object is created automatically by the library
// because finder takes one as the first argument.
auto vecIndex = Lazy::runForAll(aPairs, finder);
// The job is done twice using both an array and a vector for demonstration.
auto vecIndex = Lazy::runForAll(vecPairs, finder);
auto arrIndex = Lazy::runForAll(arrPairs, finder);

static_assert(std::is_same_v<decltype(arrIndex), std::array<std::size_t, N>>,
"2.4: Output array type does not match!");
static_assert(std::is_same_v<decltype(vecIndex), std::vector<std::size_t>>,
"2.4: Output vector type does not match!");

std::cout << "2.4: Finder results for value "<<iFindMe<<" were: (-1 == not found)\n";
for (auto i = 0; i < N; ++i)
std::cout << i << ": index range [" << aPairs[i].first << ","<< aPairs[i].second << "] : found at index = " << int(vecIndex[i]) << "\n";
for (auto i = 0; i < N; ++i) {
std::cout << i << ": index range [" << arrPairs[i].first << ","<< arrPairs[i].second << "] : found at index = " << int(vecIndex[i]);
if (int(vecIndex[i]) >= 0)
std::cout << ", value is " << vec[vecIndex[i]] << ", should be " << iFindMe << "\n";
else
std::cout << "\n";
if (arrPairs[i] != vecPairs[i]) // Should never go here
std::cout << "2.4: Array vs. vector mismatch at index " << i << " !!\n";
}
}

// Example 2.5: Call a void function by using a dummy return value. The return value can be ignored.
std::cout << "\n*** Example 2.5 *** : A void function can be called by using dummy return type.\n";
{
std::vector<int> vecIn;
for(int n = 1; n <= 5; ++n)
vecIn.push_back(n);
// Use nullptr_t as the dummy return type
Lazy::runForAll(vecIn, [](auto n) { myVoidFunction(n); return nullptr;});
Lazy::runForAll({1,2,3,4}, [](auto n) { myVoidFunction(n); return nullptr;});
}
}

0 comments on commit 947874c

Please sign in to comment.