-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_utils.h
105 lines (86 loc) · 2.66 KB
/
run_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#pragma once
#include <c10/cuda/CUDAFunctions.h>
#include <c10/util/irange.h>
#include <chrono>
#include <iomanip>
#include "passes/profile_ops.h"
#include "util/common.h"
#include "util/metrics.h"
namespace torch {
namespace jit {
using namespace std::chrono;
using namespace std::chrono_literals;
template <class T>
inline T loadPickle(const std::string &path) {
std::ifstream ifs(path, std::ios::binary);
TORCH_CHECK(ifs, "Cannot open file ", path);
std::vector<char> buf((std::istreambuf_iterator<char>(ifs)),
std::istreambuf_iterator<char>());
return torch::pickle_load(buf).to<T>();
}
inline IValue processIValue(const IValue &val) {
if (val.isList()) {
auto list = val.toListRef();
c10::impl::GenericList newList(list.front().type());
for (auto &elem : list) newList.push_back(processIValue(elem));
return std::move(newList);
} else if (val.isTuple()) {
auto &tuple = val.toTupleRef().elements();
std::vector<IValue> newValues;
for (auto &elem : tuple) newValues.push_back(processIValue(elem));
return c10::ivalue::Tuple::create(std::move(newValues));
} else if (val.isTensor()) {
return val.toTensor().cuda();
} else
return val;
}
inline Stack getFeatureSample(const c10::List<IValue> &dataset, size_t index) {
auto tup = dataset.get(index).toTupleRef().elements();
Stack inputs;
inputs.push_back({});
for (auto &val : tup) inputs.push_back(processIValue(val));
return std::move(inputs);
}
struct EvalResult {
nanoseconds total;
size_t count = 0;
nanoseconds mean() const { return total / int64_t(count); }
};
static constexpr auto kWarmupRuns = 16;
static constexpr auto kRunDuration = 10s;
inline void evalMetrics(const std::function<void(size_t)> &task,
size_t numSamples) {
// Initialize
initializeMetrics();
// Warm up
for (auto i : c10::irange(kWarmupRuns)) task(i);
at::cuda::device_synchronize();
// Run and replay
do {
beginProfilerPass();
for (auto i : c10::irange(numSamples)) {
task(i);
at::cuda::device_synchronize();
}
endProfilerPass();
} while (!allPassesSubmitted());
// Print final results
finalizeMetrics();
}
inline EvalResult evaluate(const std::function<void(size_t)> &task) {
// Warm up
for (auto i : c10::irange(kWarmupRuns)) task(i);
at::cuda::device_synchronize();
// Run for the expected period
enableProfiling();
size_t count = 0;
auto begin = system_clock::now();
while (system_clock::now() - begin < kRunDuration) {
task(count++);
at::cuda::device_synchronize();
}
disableProfiling();
return {system_clock::now() - begin, count};
}
} // namespace jit
} // namespace torch