Skip to content

Commit 6ac552a

Browse files
committed
added a bunch more benchmarks
1 parent f70fe0c commit 6ac552a

File tree

13 files changed

+533
-98
lines changed

13 files changed

+533
-98
lines changed

benchmark/Benchmark.hpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,33 +121,43 @@ MATHTER_NOINLINE void BenchmarkCase(std::string_view name, int64_t samples, int6
121121
}
122122

123123

124+
template <class Op, class Lhs, class... Args>
125+
MATHTER_FORCEINLINE decltype(auto) InvokeWithoutMonostate(Op&& op, Lhs&& lhs, Args&&... args) {
126+
if constexpr ((... && std::is_same_v<std::decay_t<Args>, std::monostate>)) {
127+
return op(std::forward<Lhs>(lhs));
128+
}
129+
else {
130+
return op(std::forward<Lhs>(lhs), std::forward<Args>(args)...);
131+
}
132+
}
133+
124134
} // namespace impl
125135

126136

127137
template <class Op, size_t Count, class Lhs, class... Args>
128-
MATHTER_FORCEINLINE static auto DependentLoop(Op op,
138+
MATHTER_FORCEINLINE static auto DependentLoop(Op&& op,
129139
const Lhs& lhs,
130140
const std::array<Args, Count>&... args) {
131-
auto result = op(lhs, args[0]...);
141+
auto result = ::impl::InvokeWithoutMonostate(op, lhs, args[0]...);
132142
for (size_t i = 1; i < Count; ++i) {
133-
result = op(result, args[i]...);
143+
result = ::impl::InvokeWithoutMonostate(op, result, args[i]...);
134144
}
135145
return result;
136146
}
137147

138148

139149
template <class Op, size_t Lanes, size_t Count, class Lhs, class... Args>
140-
MATHTER_FORCEINLINE static auto IndependentLoop(Op op,
150+
MATHTER_FORCEINLINE static auto IndependentLoop(Op&& op,
141151
const std::array<Lhs, Lanes>& lhs,
142152
const std::array<Args, Count>&... args) {
143-
using Result = std::invoke_result_t<Op, Lhs, Args...>;
153+
using Result = decltype(::impl::InvokeWithoutMonostate(op, lhs[0], args[0]...));
144154
std::array<Result, Lanes> result;
145155
for (size_t lane = 0; lane < Lanes; ++lane) {
146-
result[lane] = op(lhs[lane], args[lane]...);
156+
result[lane] = ::impl::InvokeWithoutMonostate(op, lhs[lane], args[lane]...);
147157
}
148158
for (size_t i = Lanes; i < Count; i += Lanes) {
149159
for (size_t lane = 0; lane < Lanes; ++lane) {
150-
result[lane] = op(result[lane], args[i + lane]...);
160+
result[lane] = ::impl::InvokeWithoutMonostate(op, result[lane], args[i + lane]...);
151161
}
152162
}
153163
return result;

benchmark/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ target_sources(Benchmark
1010
target_sources(Benchmark
1111
PRIVATE
1212
"Matrix/BenchmarkArithmetic.cpp"
13+
"Matrix/BenchmarkMath.cpp"
1314
"Scalar/BenchmarkArithmetic.cpp"
1415
"Scalar/BenchmarkMath.cpp"
1516
"Vector/BenchmarkArithmetic.cpp"
17+
"Vector/BenchmarkMath.cpp"
1618
)
1719

1820
target_sources(Benchmark

benchmark/Fixtures.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,22 @@
44

55

66
template <class Op>
7-
struct GenericBinaryFixture {
8-
template <class Lhs, class Rhs, size_t Count>
9-
MATHTER_FORCEINLINE auto Latency(const Lhs& lhs, const std::array<Rhs, Count>& rhs) const {
10-
return std::tuple(DependentLoop(op, lhs, rhs), Count);
7+
struct GenericNAryFixture {
8+
template <class Lhs, class... Args, size_t Count>
9+
MATHTER_FORCEINLINE auto Latency(const Lhs& lhs, const std::array<Args, Count>&... args) const {
10+
return std::tuple(DependentLoop(op, lhs, args...), Count);
1111
}
1212

13-
template <class Lhs, class Rhs, size_t Lanes, size_t Count>
14-
MATHTER_FORCEINLINE auto Throughput(const std::array<Lhs, Lanes>& lhs, const std::array<Rhs, Count>& rhs) const {
15-
return std::tuple(IndependentLoop(op, lhs, rhs), Count);
13+
template <class Lhs, class... Args, size_t Lanes, size_t Count>
14+
MATHTER_FORCEINLINE auto Throughput(const std::array<Lhs, Lanes>& lhs, const std::array<Args, Count>&... args) const {
15+
return std::tuple(IndependentLoop(op, lhs, args...), Count);
1616
}
1717

1818
Op op;
1919
};
2020

2121
template <class Op>
22-
GenericBinaryFixture(const Op&) -> GenericBinaryFixture<Op>;
22+
GenericNAryFixture(const Op&) -> GenericNAryFixture<Op>;
2323

2424

2525
template <class Op>

benchmark/Input.hpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#pragma once
2+
3+
#include <Mathter/Common/OptimizationUtil.hpp>
4+
#include <Mathter/Common/TypeTraits.hpp>
5+
#include <Mathter/Transforms.hpp>
6+
7+
#include <array>
8+
#include <complex>
9+
#include <random>
10+
11+
12+
template <class Obj, int Count>
13+
std::array<Obj, Count> MakeRandomInput(
14+
mathter::remove_complex_t<mathter::scalar_type_t<Obj>> min = 0,
15+
mathter::remove_complex_t<mathter::scalar_type_t<Obj>> max = 1) {
16+
using Scalar = mathter::scalar_type_t<Obj>;
17+
using Real = mathter::remove_complex_t<Scalar>;
18+
using namespace std::complex_literals;
19+
20+
std::mt19937_64 rne;
21+
std::uniform_real_distribution<Real> rng(Real(-0.5), Real(0.5));
22+
23+
std::array<Obj, Count> r;
24+
for (auto& v : r) {
25+
if constexpr (mathter::is_scalar_v<Obj>) {
26+
if constexpr (mathter::is_complex_v<Scalar>) {
27+
const auto real = rng(rne);
28+
const auto imag = rng(rne);
29+
v = real + Scalar(1if) * imag;
30+
}
31+
else {
32+
v = rng(rne);
33+
}
34+
}
35+
else {
36+
if constexpr (mathter::is_complex_v<Scalar>) {
37+
const Obj real = mathter::Random(rng, rne);
38+
const Obj imag = mathter::Random(rng, rne);
39+
v = real + Scalar(1if) * imag;
40+
}
41+
else {
42+
v = mathter::Random(rng, rne);
43+
}
44+
}
45+
}
46+
return r;
47+
};
48+
49+
50+
template <class Scalar, int Count>
51+
std::array<Scalar, Count> MakeConstantInput(Scalar value) {
52+
std::array<Scalar, Count> r;
53+
for (auto& v : r) {
54+
v = value;
55+
}
56+
return r;
57+
};

benchmark/Matrix/BenchmarkArithmetic.cpp

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "../Benchmark.hpp"
22
#include "../Fixtures.hpp"
3+
#include "../Input.hpp"
34

45
#include <Mathter/Matrix.hpp>
56
#include <Mathter/Transforms/RandomBuilder.hpp>
@@ -11,43 +12,46 @@ using namespace mathter;
1112

1213
namespace {
1314

14-
template <class Mat, int Count>
15-
std::array<Mat, Count> MakeInput() {
16-
using Scalar = scalar_type_t<Mat>;
17-
using Real = remove_complex_t<Scalar>;
18-
using namespace std::complex_literals;
19-
20-
std::mt19937_64 rne;
21-
std::uniform_real_distribution<Real> rng(Real(-0.5), Real(0.5));
22-
23-
std::array<Mat, Count> r;
24-
for (auto& v : r) {
25-
if constexpr (is_complex_v<Scalar>) {
26-
const Mat real = Random(rng, rne);
27-
const Mat imag = Random(rng, rne);
28-
v = real + Scalar(1if) * imag;
15+
static constexpr auto benchmarkCaseLayout_r = eMatrixLayout::ROW_MAJOR;
16+
static constexpr auto benchmarkCaseLayout_c = eMatrixLayout::COLUMN_MAJOR;
17+
18+
static constexpr auto benchmarkCaseOrder_f = eMatrixOrder::FOLLOW_VECTOR;
19+
static constexpr auto benchmarkCaseOrder_p = eMatrixOrder::PRECEDE_VECTOR;
20+
21+
22+
struct MulVecOp {
23+
template <class Vec, class Mat>
24+
auto operator()(const Vec& v, const Mat& m) const {
25+
if constexpr (order_v<Mat> == eMatrixOrder::FOLLOW_VECTOR) {
26+
return v * m;
2927
}
3028
else {
31-
v = Random(rng, rne);
29+
return m * v;
3230
}
3331
}
34-
return r;
3532
};
3633

3734

38-
static constexpr auto benchmarkCaseLayout_r = eMatrixLayout::ROW_MAJOR;
39-
static constexpr auto benchmarkCaseLayout_c = eMatrixLayout::COLUMN_MAJOR;
35+
#define MATRIX_BINOP_BENCHMARK_CASE(TYPE, ROWS, MATCH, COLS, LAYOUT_L, LAYOUT_R, PACKED, OP, OPTEXT) \
36+
BENCHMARK_CASE(#TYPE "." #ROWS #MATCH #LAYOUT_L " " OPTEXT " " #TYPE "." #MATCH #COLS #LAYOUT_R " (P=" #PACKED ")", \
37+
"[Matrix][Arithmetic]", \
38+
50, \
39+
64, \
40+
GenericNAryFixture{ OP{} }, \
41+
MakeRandomInput<Matrix<TYPE, ROWS, MATCH, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_L, PACKED>, 1>()[0], \
42+
MakeRandomInput<Matrix<TYPE, ROWS, MATCH, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_L, PACKED>, 4>(), \
43+
MakeRandomInput<Matrix<TYPE, MATCH, COLS, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_R, PACKED>, 64>());
4044

4145

42-
#define MATRIX_BINOP_BENCHMARK_CASE(TYPE, ROWS, MATCH, COLS, LAYOUT_L, LAYOUT_R, PACKED, OP, OPTEXT) \
43-
BENCHMARK_CASE(#TYPE "." #ROWS #MATCH #LAYOUT_L " " OPTEXT " " #TYPE "." #MATCH #COLS #LAYOUT_R " (P=" #PACKED ")", \
44-
"[Matrix][Arithmetic]", \
45-
50, \
46-
64, \
47-
GenericBinaryFixture{ OP{} }, \
48-
MakeInput<Matrix<TYPE, ROWS, MATCH, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_L, PACKED>, 1>()[0], \
49-
MakeInput<Matrix<TYPE, ROWS, MATCH, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_L, PACKED>, 4>(), \
50-
MakeInput<Matrix<TYPE, MATCH, COLS, eMatrixOrder::FOLLOW_VECTOR, benchmarkCaseLayout_##LAYOUT_R, PACKED>, 64>());
46+
#define MATRIX_BINOP_VEC_CASE(TYPE, ROWS, COLS, ORDER, LAYOUT, PACKED, DIM, OP, OPTEXT) \
47+
BENCHMARK_CASE(#TYPE "." #ROWS #COLS #ORDER #LAYOUT " " OPTEXT " " #TYPE "." #DIM " (P=" #PACKED ")", \
48+
"[Matrix][Arithmetic]", \
49+
50, \
50+
64, \
51+
GenericNAryFixture{ OP{} }, \
52+
MakeRandomInput<Vector<TYPE, DIM, PACKED>, 1>()[0], \
53+
MakeRandomInput<Vector<TYPE, DIM, PACKED>, 4>(), \
54+
MakeRandomInput<Matrix<TYPE, ROWS, COLS, benchmarkCaseOrder_##ORDER, benchmarkCaseLayout_##LAYOUT, PACKED>, 64>());
5155

5256

5357
MATRIX_BINOP_BENCHMARK_CASE(float, 2, 2, 2, r, r, false, std::multiplies<>, "*");
@@ -107,4 +111,45 @@ MATRIX_BINOP_BENCHMARK_CASE(double, 3, 3, 3, c, c, true, std::divides<>, "/");
107111
MATRIX_BINOP_BENCHMARK_CASE(double, 4, 4, 4, c, c, true, std::divides<>, "/");
108112

109113

114+
MATRIX_BINOP_VEC_CASE(float, 2, 2, f, r, false, 2, MulVecOp, "*");
115+
MATRIX_BINOP_VEC_CASE(float, 3, 3, f, r, false, 3, MulVecOp, "*");
116+
MATRIX_BINOP_VEC_CASE(float, 4, 4, f, r, false, 4, MulVecOp, "*");
117+
MATRIX_BINOP_VEC_CASE(float, 4, 4, f, r, false, 3, MulVecOp, "*");
118+
119+
MATRIX_BINOP_VEC_CASE(float, 2, 2, f, c, false, 2, MulVecOp, "*");
120+
MATRIX_BINOP_VEC_CASE(float, 3, 3, f, c, false, 3, MulVecOp, "*");
121+
MATRIX_BINOP_VEC_CASE(float, 4, 4, f, c, false, 4, MulVecOp, "*");
122+
MATRIX_BINOP_VEC_CASE(float, 4, 4, f, c, false, 3, MulVecOp, "*");
123+
124+
MATRIX_BINOP_VEC_CASE(float, 2, 2, p, r, false, 2, MulVecOp, "*");
125+
MATRIX_BINOP_VEC_CASE(float, 3, 3, p, r, false, 3, MulVecOp, "*");
126+
MATRIX_BINOP_VEC_CASE(float, 4, 4, p, r, false, 4, MulVecOp, "*");
127+
MATRIX_BINOP_VEC_CASE(float, 4, 4, p, r, false, 3, MulVecOp, "*");
128+
129+
MATRIX_BINOP_VEC_CASE(float, 2, 2, p, c, false, 2, MulVecOp, "*");
130+
MATRIX_BINOP_VEC_CASE(float, 3, 3, p, c, false, 3, MulVecOp, "*");
131+
MATRIX_BINOP_VEC_CASE(float, 4, 4, p, c, false, 4, MulVecOp, "*");
132+
MATRIX_BINOP_VEC_CASE(float, 4, 4, p, c, false, 3, MulVecOp, "*");
133+
134+
MATRIX_BINOP_VEC_CASE(double, 2, 2, f, r, false, 2, MulVecOp, "*");
135+
MATRIX_BINOP_VEC_CASE(double, 3, 3, f, r, false, 3, MulVecOp, "*");
136+
MATRIX_BINOP_VEC_CASE(double, 4, 4, f, r, false, 4, MulVecOp, "*");
137+
MATRIX_BINOP_VEC_CASE(double, 4, 4, f, r, false, 3, MulVecOp, "*");
138+
139+
MATRIX_BINOP_VEC_CASE(double, 2, 2, f, c, false, 2, MulVecOp, "*");
140+
MATRIX_BINOP_VEC_CASE(double, 3, 3, f, c, false, 3, MulVecOp, "*");
141+
MATRIX_BINOP_VEC_CASE(double, 4, 4, f, c, false, 4, MulVecOp, "*");
142+
MATRIX_BINOP_VEC_CASE(double, 4, 4, f, c, false, 3, MulVecOp, "*");
143+
144+
MATRIX_BINOP_VEC_CASE(double, 2, 2, p, r, false, 2, MulVecOp, "*");
145+
MATRIX_BINOP_VEC_CASE(double, 3, 3, p, r, false, 3, MulVecOp, "*");
146+
MATRIX_BINOP_VEC_CASE(double, 4, 4, p, r, false, 4, MulVecOp, "*");
147+
MATRIX_BINOP_VEC_CASE(double, 4, 4, p, r, false, 3, MulVecOp, "*");
148+
149+
MATRIX_BINOP_VEC_CASE(double, 2, 2, p, c, false, 2, MulVecOp, "*");
150+
MATRIX_BINOP_VEC_CASE(double, 3, 3, p, c, false, 3, MulVecOp, "*");
151+
MATRIX_BINOP_VEC_CASE(double, 4, 4, p, c, false, 4, MulVecOp, "*");
152+
MATRIX_BINOP_VEC_CASE(double, 4, 4, p, c, false, 3, MulVecOp, "*");
153+
154+
110155
} // namespace

0 commit comments

Comments
 (0)