-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbenchmark.cpp
133 lines (110 loc) · 3.07 KB
/
benchmark.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
///
/// @file benchmark.cpp
/// @brief Simple benchmark program for libpopcnt.h, repeatedly
/// counts the 1 bits inside a vector.
///
/// Usage: ./benchmark [array bytes] [iters]
///
/// Copyright (C) 2017 Kim Walisch, <kim.walisch@gmail.com>
///
/// This file is distributed under the BSD License. See the LICENSE
/// file in the top level directory.
///
#include <libpopcnt.h>
#include <iostream>
#include <iomanip>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <stdint.h>
#include <string>
double get_seconds()
{
return (double) std::clock() / CLOCKS_PER_SEC;
}
// init vector with random data
template <typename T>
void init(std::vector<T>& v)
{
std::srand((unsigned) std::time(0));
for (uint64_t i = 0; i < v.size(); i++)
v[i] = (uint8_t) std::rand();
}
// count 1 bits inside vector
template <typename T>
uint64_t benchmark(std::vector<T> v, int iters)
{
uint64_t total = 0;
int old = - 1;
for (int i = 0; i < iters; i++)
{
int percent = (int)(100.0 * i / iters);
if (percent > old)
{
std::cout << "\rStatus: " << percent << "%" << std::flush;
old = percent;
}
total += popcnt(&v[0], v.size());
}
return total;
}
void verify(uint64_t cnt, uint64_t total, int iters)
{
if (cnt != total / iters)
{
std::cerr << "libpopcnt verification failed!" << std::endl;
std::exit(1);
}
}
int main(int argc, char* argv[])
{
int bytes = (1 << 10) * 16;
int iters = 10000000;
if (argc > 1)
bytes = std::atoi(argv[1]);
if (argc > 2)
iters = std::atoi(argv[2]);
uint64_t cnt = 0;
std::vector<uint8_t> v(bytes);
std::string algo;
init(v);
std::cout << "Iters: " << iters << std::endl;
if (bytes < 1024)
std::cout << "Array size: " << bytes << " bytes" << std::endl;
else if (bytes < 1024 * 1024)
std::cout << "Array size: " << std::fixed << std::setprecision(2) << bytes / 1024.0 << " KB" << std::endl;
else
std::cout << "Array size: " << std::fixed << std::setprecision(2) << bytes / (1024.0 * 1024.0) << " MB" << std::endl;
#if defined(X86_OR_X64)
#if defined(HAVE_CPUID)
int cpuid = get_cpuid();
if ((cpuid & bit_AVX512) && bytes >= 1024)
algo = "AVX512";
else if ((cpuid & bit_AVX2) && bytes >= 512)
algo = "AVX2";
else if (cpuid & bit_POPCNT)
algo = "POPCNT";
#endif
#elif defined(__ARM_NEON) || \
defined(__aarch64__)
algo = "NEON";
#elif defined(__PPC64__)
algo = "POPCNTD";
#endif
if (algo.empty())
algo = "integer popcount";
std::cout << "Algorithm: " << algo << std::endl;
for (uint64_t i = 0; i < v.size(); i++)
cnt += popcount64(v[i]);
double seconds = get_seconds();
uint64_t total = benchmark(v, iters);
seconds = get_seconds() - seconds;
std::cout << "\rStatus: 100%" << std::endl;
std::cout << "Seconds: " << std::fixed << std::setprecision(2) << seconds << std::endl;
double total_bytes = (double) bytes * (double) iters;
double GB = total_bytes / 1e9;
double GBs = GB / seconds;
std::cout << std::fixed << std::setprecision(1) << GBs << " GB/s" << std::endl;
verify(cnt, total, iters);
return 0;
}