forked from uTensor/uTensor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MathOps.hpp
200 lines (173 loc) · 6.2 KB
/
MathOps.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#ifndef UTENSOR_MATH_OPS
#define UTENSOR_MATH_OPS
#include <climits>
#include "quantization_utils.hpp"
#include "tensor.hpp"
void CalculateUsedRange(Tensor<int>& input, int32_t* used_min_quan,
int32_t* used_max_quan) {
int32_t minimum = INT_MAX;
int32_t maxmum = INT_MIN;
uint32_t size = input.getSize();
int* in_ptr = input.getPointer({});
for (uint32_t i = 0; i < size; i++) {
if (minimum > in_ptr[i]) minimum = static_cast<int32_t>(in_ptr[i]);
if (maxmum < in_ptr[i]) maxmum = static_cast<int32_t>(in_ptr[i]);
}
*used_min_quan = minimum;
*used_max_quan = maxmum;
}
template <class T1, class T2>
void Requantization_Range(Tensor<T1> input, Tensor<T2> min, Tensor<T2> max,
Tensor<T2> out_min, Tensor<T2> out_max) {
const float input_min = *(min.getPointer({}));
const float input_max = *(max.getPointer({}));
int32_t used_min_quan;
int32_t used_max_quan;
CalculateUsedRange(input, &used_min_quan, &used_max_quan);
const float used_min =
std::min(0.0f, QuantizedToFloat(used_min_quan, input_min, input_max));
const float used_max = QuantizedToFloat(used_max_quan, input_min, input_max);
float* c_min = out_min.getPointer({});
*c_min = used_min;
float* c_max = out_max.getPointer({});
*c_max = used_max;
}
template <class T1, class T2, class Toutput>
void Requantize(Tensor<T1> input, Tensor<T2> in_min, Tensor<T2> in_max,
Tensor<T2> r_min, Tensor<T2> r_max, Tensor<Toutput> output,
Tensor<T2> out_min, Tensor<T2> out_max) {
const float input_min = in_min.getPointer({})[0];
const float input_max = in_max.getPointer({})[0];
const float r_output_min = r_min.getPointer({})[0];
const float r_output_max = r_max.getPointer({})[0];
T1 *input_ptr = input.getPointer({});
Toutput *out_ptr = output.getPointer({});
// RequantizeManyInNewRange<T1, Toutput>(input, input.getSize(), input_min,
// input_max, r_output_min, r_output_max,
// output);
RequantizeManyInNewRangeReference(input_ptr, input.getSize(),input_min,
input_max, r_output_min, r_output_max, out_ptr);
float* v_out_min = out_min.getPointer({});
*v_out_min = r_output_min;
float* v_out_max = out_max.getPointer({});
*v_out_max = r_output_max;
}
template <class TIn, class TOut>
void Add(Tensor<TIn> input, Tensor<TIn> input2, Tensor<TOut> &out) {
const TIn* p_in = input.getPointer({});
const TIn* p_in2 = input2.getPointer({});
//auto shape
tensorChkAlloc(out, input.getShape());
TOut* p_out = out.getPointer({});
const uint32_t size = out.getSize();
for (uint32_t i = 0; i < size; i++) {
p_out[i] = p_in[i] + p_in2[i];
}
}
template <class TIn, class Td, class TOut>
void Min(Tensor<TIn> input, Tensor<Td> dim, Tensor<TOut> out) {
const TIn* p_in = input.getPointer({});
const Td* p_in2 = dim.getPointer({});
TOut* p_out = out.getPointer({});
Td n_dim = p_in2[0];
vector<uint8_t> permute;
for (uint32_t i_dim = 0; i_dim < input.getShape().size(); i_dim++) {
permute.push_back(i_dim);
}
permute.push_back(n_dim);
permute.erase(permute.begin() + n_dim);
Shape outShape = input.getShape();
size_t reduce_size = outShape[n_dim];
outShape.erase(outShape.begin() + n_dim);
outShape.push_back(reduce_size);
size_t out_index = 0;
permuteIndexTransform trans(outShape, permute);
for (uint32_t j = 0; j < input.getSize(); j += reduce_size) {
TIn min_val = std::numeric_limits<TIn>::max();
for (size_t k = 0; k < reduce_size; k++) {
TIn val = p_in[trans[j + k]];
if (val < min_val) {
min_val = val;
}
}
p_out[out_index] = min_val;
out_index++;
}
}
template <class TIn, class Td, class TOut>
void Max(Tensor<TIn> input, Tensor<Td> dim, Tensor<TOut> out) {
const TIn* p_in = input.getPointer({});
const Td* p_in2 = dim.getPointer({});
TOut* p_out = out.getPointer({});
Td n_dim = p_in2[0];
vector<uint8_t> permute;
for (uint32_t i_dim = 0; i_dim < input.getShape().size(); i_dim++) {
permute.push_back(i_dim);
}
permute.push_back(n_dim);
permute.erase(permute.begin() + n_dim);
Shape outShape = input.getShape();
size_t reduce_size = outShape[n_dim];
outShape.erase(outShape.begin() + n_dim);
outShape.push_back(reduce_size);
size_t out_index = 0;
permuteIndexTransform trans(outShape, permute);
for (uint32_t j = 0; j < input.getSize(); j += reduce_size) {
TIn max_val = std::numeric_limits<TIn>::lowest();
for (size_t k = 0; k < reduce_size; k++) {
TIn val = p_in[trans[j + k]];
if (val > max_val) {
max_val = val;
}
}
p_out[out_index] = max_val;
out_index++;
}
}
template <class TIn, class TOut>
void ArgMax(Tensor<TIn> input, Tensor<int> dim, Tensor<TOut>& out) {
int dim_reduce = *(dim.getPointer({0}));
Shape outShape = input.getShape();
uint32_t reduce_dim_size = outShape[dim_reduce];
outShape.erase(outShape.begin() + dim_reduce);
// construct the permute vector
vector<uint8_t> permute;
for (uint8_t i = 0; i < input.getShape().size(); i++) {
permute.push_back(i);
}
permute.push_back(static_cast<uint8_t>(dim_reduce));
permute.erase(permute.begin() + dim_reduce);
// check dimensionality
if (out.getSize() != 0 && out.getShape() != outShape) {
ERR_EXIT("output shape mismatch");
}
// allocate output tensor if empty
if (out.getSize() == 0) {
out = Tensor<TOut>(outShape);
}
// construct the origin-shape for permuteIndexTransform
Shape vOutShape = outShape;
vOutShape.push_back(reduce_dim_size);
/// NT: easy way to remember...
// trans(originShape, permute)
// targetIndex = trans[OriginIndex]
// In this case, we are going backward.
permuteIndexTransform trans(vOutShape, permute);
TIn* inPtr = input.getPointer({});
TOut* outPtr = out.getPointer({});
size_t out_index = 0;
for (uint32_t i = 0; i < input.getSize(); i += reduce_dim_size) {
TOut max_j = 0;
TIn last_max = std::numeric_limits<TIn>::min();
for (uint32_t j = 0; j < reduce_dim_size; j++) {
TIn val = inPtr[trans[i + j]];
if (val > last_max) {
last_max = val;
max_j = j;
}
}
outPtr[out_index] = max_j;
out_index++;
}
}
#endif // UTENSOR_MATH_OPS