Skip to content

Commit

Permalink
apply upstream changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ralf Moeller committed Sep 4, 2024
1 parent ded0cc9 commit 18991f6
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 95 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ transposeTestAutoGen
verticalBitonicSortTest
tsimd_sh.H
transpose_inplace_autogen_stat.txt
tsimd_tarfile.list
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

Templates parameters are the element data type of the vectors and the vector width in bytes (e.g. 16 for SSE* and NEON, 32 for AVX/AVX2). This makes it possible to flexibly change the data type and the vector instruction set for entire portions of the code. Moreover, many implementation details at the intrinsics level are hidden by **T-SIMD**. SSE*, AVX/AVX2, AVX-512, and ARM NEON vector instruction sets are currently supported. Please note the [differences on different architectures](@ref differences-on-different-architectures).

# License

This software is distributed based on a specific **license agreement**, please see the file `LICENSE` or `LICENSE.doc`.

# Compiler and C++ Standard Support/Requirements

**T-SIMD** requires at least C++11.
Expand Down
12 changes: 8 additions & 4 deletions SIMDVecAutoTestSerial.H
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,13 @@ struct SerialVec
// individually
if ((rand() & 0x0f) == 0) {
T value;
while ((value = getRandomOrSpecialValue<T>()) < T(0));
while ((value = getRandomOrSpecialValue<T>()) < T(0))
;
for (size_t i = 0; i < elements; i++) vec[i] = value;
} else {
for (size_t i = 0; i < elements; i++)
while ((vec[i] = getRandomOrSpecialValue<T>()) < T(0));
while ((vec[i] = getRandomOrSpecialValue<T>()) < T(0))
;
}
}
void randomizeRanges()
Expand Down Expand Up @@ -219,11 +221,13 @@ struct SerialVec
// individually
if ((rand() & 0x0f) == 0) {
T value;
while ((value = getRandomOrSpecialValueRanges<T>()) == T(0));
while ((value = getRandomOrSpecialValueRanges<T>()) == T(0))
;
for (size_t i = 0; i < elements; i++) vec[i] = value;
} else {
for (size_t i = 0; i < elements; i++)
while ((vec[i] = getRandomOrSpecialValueRanges<T>()) == T(0));
while ((vec[i] = getRandomOrSpecialValueRanges<T>()) == T(0))
;
}
}
void randomizeCond()
Expand Down
180 changes: 90 additions & 90 deletions SIMDVecBaseImplIntel16.H
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ static SIMD_INLINE Vec<Long, 16> min(const Vec<Long, 16> &a,
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
const __m128i diff = _mm_sub_epi64(b, a);
#if 1 // TODO: check which is faster
const __m128i res = _mm_xor_si128(
const __m128i res = _mm_xor_si128(
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
#else
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),
Expand Down Expand Up @@ -1209,7 +1209,7 @@ static SIMD_INLINE Vec<Long, 16> max(const Vec<Long, 16> &a,
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
const __m128i diff = _mm_sub_epi64(b, a);
#if 1 // TODO: check which is faster
const __m128i res = _mm_xor_si128(
const __m128i res = _mm_xor_si128(
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
#else
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),
Expand Down Expand Up @@ -2188,10 +2188,10 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
const __m128i vInPos = _mm_max_epi8(vIn, _mm_setzero_si128());
#else
// from Agner Fog's VCL vectori128.h
const __m128i signbit = _mm_set1_epi32(0x80808080);
const __m128i a1 = _mm_xor_si128(vIn, signbit); // add 0x80
const __m128i m1 = _mm_max_epu8(a1, signbit); // unsigned max
const __m128i vInPos = _mm_xor_si128(m1, signbit); // sub 0x80
const __m128i signbit = _mm_set1_epi32(0x80808080);
const __m128i a1 = _mm_xor_si128(vIn, signbit); // add 0x80
const __m128i m1 = _mm_max_epu8(a1, signbit); // unsigned max
const __m128i vInPos = _mm_xor_si128(m1, signbit); // sub 0x80
#endif
vOut[0] = _mm_unpacklo_epi8(vInPos, _mm_setzero_si128());
vOut[1] = _mm_unpackhi_epi8(vInPos, _mm_setzero_si128());
Expand All @@ -2212,16 +2212,16 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
vOut[2] = _mm_cvtepi8_epi32(_mm_srli_si128(vIn, 8));
vOut[3] = _mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12));
#else
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
vOut[0] = _mm_srai_epi32(lolo16, 24);
vOut[1] = _mm_srai_epi32(lohi16, 24);
vOut[2] = _mm_srai_epi32(hilo16, 24);
vOut[3] = _mm_srai_epi32(hihi16, 24);
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
vOut[0] = _mm_srai_epi32(lolo16, 24);
vOut[1] = _mm_srai_epi32(lohi16, 24);
vOut[2] = _mm_srai_epi32(hilo16, 24);
vOut[3] = _mm_srai_epi32(hihi16, 24);
#endif
}

Expand All @@ -2234,16 +2234,16 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
vOut[2] = _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 8)));
vOut[3] = _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12)));
#else
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
vOut[0] = _mm_cvtepi32_ps(_mm_srai_epi32(lolo16, 24));
vOut[1] = _mm_cvtepi32_ps(_mm_srai_epi32(lohi16, 24));
vOut[2] = _mm_cvtepi32_ps(_mm_srai_epi32(hilo16, 24));
vOut[3] = _mm_cvtepi32_ps(_mm_srai_epi32(hihi16, 24));
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
vOut[0] = _mm_cvtepi32_ps(_mm_srai_epi32(lolo16, 24));
vOut[1] = _mm_cvtepi32_ps(_mm_srai_epi32(lohi16, 24));
vOut[2] = _mm_cvtepi32_ps(_mm_srai_epi32(hilo16, 24));
vOut[3] = _mm_cvtepi32_ps(_mm_srai_epi32(hihi16, 24));
#endif
}

Expand Down Expand Up @@ -2281,10 +2281,10 @@ static SIMD_INLINE void extend(const Vec<Short, 16> &vIn,
_mm_srai_epi32(_mm_unpacklo_epi16(_mm_undefined_si128(), vIn), 16);
const __m128i hi16 =
_mm_srai_epi32(_mm_unpackhi_epi16(_mm_undefined_si128(), vIn), 16);
vOut[0] = _mm_cvtepi32_pd(lo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
vOut[2] = _mm_cvtepi32_pd(hi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
vOut[0] = _mm_cvtepi32_pd(lo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
vOut[2] = _mm_cvtepi32_pd(hi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
#endif
}

Expand Down Expand Up @@ -2332,12 +2332,12 @@ static SIMD_INLINE void extend(const Vec<Word, 16> &vIn, Vec<Long, 16> vOut[4])
vOut[2] = _mm_cvtepu16_epi64(_mm_srli_si128(vIn, 8));
vOut[3] = _mm_cvtepu16_epi64(_mm_srli_si128(vIn, 12));
#else
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
vOut[0] = _mm_unpacklo_epi32(lo16, _mm_setzero_si128());
vOut[1] = _mm_unpackhi_epi32(lo16, _mm_setzero_si128());
vOut[2] = _mm_unpacklo_epi32(hi16, _mm_setzero_si128());
vOut[3] = _mm_unpackhi_epi32(hi16, _mm_setzero_si128());
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
vOut[0] = _mm_unpacklo_epi32(lo16, _mm_setzero_si128());
vOut[1] = _mm_unpackhi_epi32(lo16, _mm_setzero_si128());
vOut[2] = _mm_unpacklo_epi32(hi16, _mm_setzero_si128());
vOut[3] = _mm_unpackhi_epi32(hi16, _mm_setzero_si128());
#endif
}

Expand All @@ -2350,12 +2350,12 @@ static SIMD_INLINE void extend(const Vec<Word, 16> &vIn,
vOut[2] = _mm_cvtepi32_pd(_mm_cvtepu16_epi32(_mm_srli_si128(vIn, 8)));
vOut[3] = _mm_cvtepi32_pd(_mm_cvtepu16_epi32(_mm_srli_si128(vIn, 12)));
#else
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
vOut[0] = _mm_cvtepi32_pd(lo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
vOut[2] = _mm_cvtepi32_pd(hi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
const __m128i lo16 = _mm_unpacklo_epi16(vIn, _mm_setzero_si128());
const __m128i hi16 = _mm_unpackhi_epi16(vIn, _mm_setzero_si128());
vOut[0] = _mm_cvtepi32_pd(lo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lo16, 8));
vOut[2] = _mm_cvtepi32_pd(hi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(hi16, 8));
#endif
}

Expand Down Expand Up @@ -2416,24 +2416,24 @@ static SIMD_INLINE void extend(const Vec<SignedByte, 16> &vIn,
vOut[6] = _mm_cvtepi32_pd(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 12)));
vOut[7] = _mm_cvtepi32_pd(_mm_cvtepi8_epi32(_mm_srli_si128(vIn, 14)));
#else
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
const __m128i lolo16ext = _mm_srai_epi32(lolo16, 24);
const __m128i lohi16ext = _mm_srai_epi32(lohi16, 24);
const __m128i hilo16ext = _mm_srai_epi32(hilo16, 24);
const __m128i hihi16ext = _mm_srai_epi32(hihi16, 24);
vOut[0] = _mm_cvtepi32_pd(lolo16ext);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16ext, 8));
vOut[2] = _mm_cvtepi32_pd(lohi16ext);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16ext, 8));
vOut[4] = _mm_cvtepi32_pd(hilo16ext);
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16ext, 8));
vOut[6] = _mm_cvtepi32_pd(hihi16ext);
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16ext, 8));
const __m128i lo8 = _mm_unpacklo_epi8(_mm_undefined_si128(), vIn);
const __m128i hi8 = _mm_unpackhi_epi8(_mm_undefined_si128(), vIn);
const __m128i lolo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), lo8);
const __m128i lohi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), lo8);
const __m128i hilo16 = _mm_unpacklo_epi16(_mm_undefined_si128(), hi8);
const __m128i hihi16 = _mm_unpackhi_epi16(_mm_undefined_si128(), hi8);
const __m128i lolo16ext = _mm_srai_epi32(lolo16, 24);
const __m128i lohi16ext = _mm_srai_epi32(lohi16, 24);
const __m128i hilo16ext = _mm_srai_epi32(hilo16, 24);
const __m128i hihi16ext = _mm_srai_epi32(hihi16, 24);
vOut[0] = _mm_cvtepi32_pd(lolo16ext);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16ext, 8));
vOut[2] = _mm_cvtepi32_pd(lohi16ext);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16ext, 8));
vOut[4] = _mm_cvtepi32_pd(hilo16ext);
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16ext, 8));
vOut[6] = _mm_cvtepi32_pd(hihi16ext);
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16ext, 8));
#endif
}

Expand All @@ -2451,20 +2451,20 @@ static SIMD_INLINE void extend(const Vec<Byte, 16> &vIn, Vec<Long, 16> vOut[8])
vOut[6] = _mm_cvtepu8_epi64(_mm_srli_si128(vIn, 12));
vOut[7] = _mm_cvtepu8_epi64(_mm_srli_si128(vIn, 14));
#else
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
vOut[0] = _mm_unpacklo_epi32(lolo16, _mm_setzero_si128());
vOut[1] = _mm_unpackhi_epi32(lolo16, _mm_setzero_si128());
vOut[2] = _mm_unpacklo_epi32(lohi16, _mm_setzero_si128());
vOut[3] = _mm_unpackhi_epi32(lohi16, _mm_setzero_si128());
vOut[4] = _mm_unpacklo_epi32(hilo16, _mm_setzero_si128());
vOut[5] = _mm_unpackhi_epi32(hilo16, _mm_setzero_si128());
vOut[6] = _mm_unpacklo_epi32(hihi16, _mm_setzero_si128());
vOut[7] = _mm_unpackhi_epi32(hihi16, _mm_setzero_si128());
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
vOut[0] = _mm_unpacklo_epi32(lolo16, _mm_setzero_si128());
vOut[1] = _mm_unpackhi_epi32(lolo16, _mm_setzero_si128());
vOut[2] = _mm_unpacklo_epi32(lohi16, _mm_setzero_si128());
vOut[3] = _mm_unpackhi_epi32(lohi16, _mm_setzero_si128());
vOut[4] = _mm_unpacklo_epi32(hilo16, _mm_setzero_si128());
vOut[5] = _mm_unpackhi_epi32(hilo16, _mm_setzero_si128());
vOut[6] = _mm_unpacklo_epi32(hihi16, _mm_setzero_si128());
vOut[7] = _mm_unpackhi_epi32(hihi16, _mm_setzero_si128());
#endif
}

Expand All @@ -2481,20 +2481,20 @@ static SIMD_INLINE void extend(const Vec<Byte, 16> &vIn,
vOut[6] = _mm_cvtepi32_pd(_mm_cvtepu8_epi32(_mm_srli_si128(vIn, 12)));
vOut[7] = _mm_cvtepi32_pd(_mm_cvtepu8_epi32(_mm_srli_si128(vIn, 14)));
#else
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
vOut[0] = _mm_cvtepi32_pd(lolo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16, 8));
vOut[2] = _mm_cvtepi32_pd(lohi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16, 8));
vOut[4] = _mm_cvtepi32_pd(hilo16);
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16, 8));
vOut[6] = _mm_cvtepi32_pd(hihi16);
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16, 8));
const __m128i lo8 = _mm_unpacklo_epi8(vIn, _mm_setzero_si128());
const __m128i hi8 = _mm_unpackhi_epi8(vIn, _mm_setzero_si128());
const __m128i lolo16 = _mm_unpacklo_epi16(lo8, _mm_setzero_si128());
const __m128i lohi16 = _mm_unpackhi_epi16(lo8, _mm_setzero_si128());
const __m128i hilo16 = _mm_unpacklo_epi16(hi8, _mm_setzero_si128());
const __m128i hihi16 = _mm_unpackhi_epi16(hi8, _mm_setzero_si128());
vOut[0] = _mm_cvtepi32_pd(lolo16);
vOut[1] = _mm_cvtepi32_pd(_mm_srli_si128(lolo16, 8));
vOut[2] = _mm_cvtepi32_pd(lohi16);
vOut[3] = _mm_cvtepi32_pd(_mm_srli_si128(lohi16, 8));
vOut[4] = _mm_cvtepi32_pd(hilo16);
vOut[5] = _mm_cvtepi32_pd(_mm_srli_si128(hilo16, 8));
vOut[6] = _mm_cvtepi32_pd(hihi16);
vOut[7] = _mm_cvtepi32_pd(_mm_srli_si128(hihi16, 8));
#endif
}

Expand Down Expand Up @@ -3572,7 +3572,7 @@ static SIMD_INLINE Vec<Long, 16> cmplt(const Vec<Long, 16> &a,
// from Hacker's Delight, 2-12 Comparison Predicates:
const __m128i diff = _mm_sub_epi64(a, b);
#if 1 // TODO: check which is faster
const __m128i res = _mm_xor_si128(
const __m128i res = _mm_xor_si128(
diff, _mm_and_si128(_mm_xor_si128(a, b), _mm_xor_si128(diff, a)));
#else
const __m128i res = _mm_or_si128(_mm_andnot_si128(b, a),
Expand Down Expand Up @@ -3777,7 +3777,7 @@ static SIMD_INLINE Vec<Long, 16> cmpgt(const Vec<Long, 16> &a,
// from Hacker's Delight, 2-12 Comparison Predicates: (swapped lt)
const __m128i diff = _mm_sub_epi64(b, a);
#if 1 // TODO: check which is faster
const __m128i res = _mm_xor_si128(
const __m128i res = _mm_xor_si128(
diff, _mm_and_si128(_mm_xor_si128(b, a), _mm_xor_si128(diff, b)));
#else
const __m128i res = _mm_or_si128(_mm_andnot_si128(a, b),
Expand Down
2 changes: 1 addition & 1 deletion simdmasktest.C
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ void benchmark()
#ifdef BENCH_MASK_RAND2
result = maskz_load(krand2, buffer);
#endif
sum = add(sum, result);
sum = add(sum, result);
}
gettimeofday(&end, nullptr);
print("%i ", sum); // TODO change format according to type T
Expand Down

0 comments on commit 18991f6

Please sign in to comment.