Skip to content

Commit 16764fc

Browse files
committed
[Quantizer] PerTensorAffineQuantizer operations
This PR adds initial PerTensorAffineQuantizer operation implementations. This change allows users to quantize and dequantize tensors. Note that the current implementation is naive and has limited features. The optimized version will be introduced in the later PR. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
1 parent b2b97c2 commit 16764fc

File tree

4 files changed

+240
-6
lines changed

4 files changed

+240
-6
lines changed

nntrainer/tensor/quantizer.cpp

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,24 @@
88
* @bug No known bugs except for NYI items
99
*/
1010

11+
#include <math.h>
1112
#include <quantizer.h>
1213

1314
namespace nntrainer {
1415

16+
/**
17+
* @brief Helper function for clipping
18+
*
19+
* @tparam T data type
20+
* @param val value to clip
21+
* @param lower lower bound
22+
* @param upper upper bound
23+
* @return T cliped data
24+
*/
25+
template <typename T> T clip(const T &val, const T &lower, const T &upper) {
26+
return std::max(lower, std::min(val, upper));
27+
}
28+
1529
/**
1630
* @brief PerTensorAffineQuantizer class
1731
*/
@@ -21,20 +35,83 @@ std::unique_ptr<Quantizer> PerTensorAffineQuantizer::create() {
2135

2236
Tensor PerTensorAffineQuantizer::quantize(const Tensor &input,
2337
Tdatatype qtype) {
24-
/// @todo NYI
25-
return input;
38+
// Currently only full precision floating point is supported
39+
NNTR_THROW_IF(input.getDataType() != Tdatatype::FP32, std::invalid_argument)
40+
<< "[Quantizer::quantize] Tensor data type is not floating point";
41+
42+
NNTR_THROW_IF(qtype == Tdatatype::FP32, std::invalid_argument)
43+
<< "[Quantizer::quantize] Cannot quantize to full precision floating point";
44+
45+
// 1. Calculate quantization parameters
46+
calculateQParams(input, qtype);
47+
48+
// 2. Create output tensor with same dimension but different data type
49+
TensorDim dim = input.getDim();
50+
dim.setDataType(qtype);
51+
Tensor output(dim);
52+
53+
/// @todo this is a naive impl. need optimization
54+
for (unsigned int b = 0; b < output.batch(); ++b) {
55+
for (unsigned int c = 0; c < output.channel(); ++c) {
56+
for (unsigned int h = 0; h < output.height(); ++h) {
57+
for (unsigned int w = 0; w < output.width(); ++w) {
58+
output.setValue(
59+
b, c, h, w,
60+
clip(std::lround(input.getValue(b, c, h, w) / scale + zero_point),
61+
quant_min, quant_max));
62+
}
63+
}
64+
}
65+
}
66+
67+
return output;
2668
}
2769

2870
Tensor PerTensorAffineQuantizer::dequantize(const Tensor &input,
2971
Tdatatype dtype) {
30-
/// @todo NYI
31-
return input;
72+
Tensor output = input.clone(dtype);
73+
74+
/// @todo this is a naive impl. need optimization
75+
for (unsigned int b = 0; b < output.batch(); ++b) {
76+
for (unsigned int c = 0; c < output.channel(); ++c) {
77+
for (unsigned int h = 0; h < output.height(); ++h) {
78+
for (unsigned int w = 0; w < output.width(); ++w) {
79+
output.setValue(b, c, h, w,
80+
(input.getValue<int8_t>(b, c, h, w) - zero_point) *
81+
scale);
82+
}
83+
}
84+
}
85+
}
86+
87+
return output;
3288
}
3389

3490
QScheme PerTensorAffineQuantizer::qscheme() const {
3591
return QScheme::PER_TENSOR_AFFINE;
3692
}
3793

94+
void PerTensorAffineQuantizer::calculateQParams(const Tensor &input,
95+
Tdatatype qtype) {
96+
unsigned int N;
97+
98+
if (qtype == Tdatatype::QINT8) {
99+
N = 8;
100+
} else if (qtype == Tdatatype::QINT4) {
101+
N = 4;
102+
} else {
103+
throw std::invalid_argument("Error: Unsupported data type.");
104+
}
105+
106+
quant_max = std::pow(2, N - 1) - 1;
107+
quant_min = -std::pow(2, N - 1);
108+
109+
/// @todo for quint8, zero point calculation should be added
110+
float max_val = input.max_abs();
111+
scale = max_val / ((quant_max - quant_min) / 2.0f);
112+
scale = std::max(scale, std::numeric_limits<float>::epsilon());
113+
}
114+
38115
/**
39116
* @brief PerChannelAffineQuantizer class
40117
*/

nntrainer/tensor/quantizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ class PerTensorAffineQuantizer : public UniformQuantizer {
192192
/**
193193
* @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype)
194194
*/
195-
void calculateQParams(const Tensor &input, Tdatatype qtype) override {}
195+
void calculateQParams(const Tensor &input, Tdatatype qtype) override;
196196
};
197197

198198
/**

test/unittest/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ test_target = [
3939
['unittest_nntrainer_internal', []],
4040
['unittest_nntrainer_lazy_tensor', []],
4141
['unittest_nntrainer_tensor', []],
42-
['unittest_nntrainer_tensor_nhwc', []],
42+
['unittest_nntrainer_quantizer', []],
4343
['unittest_util_func', []],
4444
['unittest_nntrainer_modelfile', []],
4545
['unittest_nntrainer_models', [
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
/**
3+
* Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
4+
*
5+
* @file unittest_nntrainer_tensor.cpp
6+
* @date 03 June 2020
7+
* @brief Unit test utility for tensor.
8+
* @see https://github.com/nnstreamer/nntrainer
9+
* @author Jijoong Moon <jijoong.moon@samsung.com>
10+
* @bug No known bugs
11+
*/
12+
#include <gtest/gtest.h>
13+
14+
#include "nntrainer_test_util.h"
15+
#include "util_func.h"
16+
#include <fstream>
17+
#include <nntrainer_error.h>
18+
#include <quantizer.h>
19+
#include <tensor.h>
20+
21+
TEST(nntrainer_Quantizer, per_tensor_affine_01_n) {
22+
nntrainer::Tensor input(3, 2, 4, 5);
23+
input.setRandNormal(1.235f, 0.04f);
24+
25+
std::unique_ptr<nntrainer::Quantizer> quantizer =
26+
nntrainer::Quantization::createQuantizer(
27+
nntrainer::QScheme::PER_TENSOR_AFFINE);
28+
29+
EXPECT_THROW(quantizer->quantize(input, nntrainer::Tdatatype::FP32),
30+
std::invalid_argument);
31+
}
32+
33+
TEST(nntrainer_Quantizer, per_tensor_affine_02_n) {
34+
nntrainer::Tensor input(3, 3, 24, 24);
35+
input.setRandNormal(3.812f, 0.15f);
36+
37+
std::unique_ptr<nntrainer::Quantizer> quantizer =
38+
nntrainer::Quantization::createQuantizer(
39+
nntrainer::QScheme::PER_TENSOR_AFFINE);
40+
41+
nntrainer::Tensor quantized_tensor =
42+
quantizer->quantize(input, nntrainer::Tdatatype::QINT8);
43+
44+
EXPECT_THROW(quantizer->dequantize(input, nntrainer::Tdatatype::QINT8),
45+
std::invalid_argument);
46+
}
47+
48+
TEST(nntrainer_Quantizer, per_tensor_affine_03_p) {
49+
float input_data[] = {-0.16924214, -0.10338581, 0.31561565, -0.00533330,
50+
0.44809300, -0.15348488, 0.14003623, -0.07908171,
51+
-0.21415669, -0.35267806, 0.46354777, -0.35009885,
52+
-0.07760239, -0.28348053, -0.37242615, 0.30941701};
53+
nntrainer::Tensor input({1, 1, 4, 4}, input_data);
54+
55+
int8_t qdata[] = {-47, -28, 87, -1, 123, -42, 39, -22,
56+
-59, -97, 127, -96, -21, -78, -102, 85};
57+
nntrainer::Tensor quant_answer(
58+
{1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata);
59+
60+
float output_data[] = {-0.17087643, -0.10179872, 0.31630316, -0.00363567,
61+
0.44718724, -0.15269808, 0.14179108, -0.07998471,
62+
-0.21450445, -0.35265985, 0.46172991, -0.34902418,
63+
-0.07634904, -0.28358215, -0.37083820, 0.30903184};
64+
nntrainer::Tensor float_answer({1, 1, 4, 4}, output_data);
65+
66+
// Per tensor affine quantizer
67+
std::unique_ptr<nntrainer::Quantizer> quantizer =
68+
nntrainer::Quantization::createQuantizer(
69+
nntrainer::QScheme::PER_TENSOR_AFFINE);
70+
71+
// Perform Quantization
72+
nntrainer::Tensor quantized_tensor =
73+
quantizer->quantize(input, nntrainer::Tdatatype::QINT8);
74+
ASSERT_EQ(quantized_tensor, quant_answer);
75+
76+
// Perform Deuantization
77+
nntrainer::Tensor output =
78+
quantizer->dequantize(quantized_tensor, nntrainer::Tdatatype::FP32);
79+
ASSERT_EQ(output, float_answer);
80+
}
81+
82+
TEST(nntrainer_Quantizer, per_tensor_affine_04_p) {
83+
float input_data[] = {
84+
-0.29562217, 0.02348283, 0.04334664, 0.03752254, 0.17764580,
85+
0.04449826, 0.15144463, -0.15716791, -0.07842141, 0.34517670,
86+
0.16458672, -0.09487095, -0.28020513, 0.32698259, -0.24903688,
87+
-0.33132783, 0.13940062, 0.18400775, -0.26359966, 0.30900121,
88+
0.08309542, -0.09066082, 0.08950174, -0.29709017, -0.26397359,
89+
-0.16240828, -0.18758762, -0.31878781, 0.06728745, -0.04749811,
90+
0.16789703, 0.02212419, 0.10671097, -0.28938687, 0.16250020,
91+
-0.09017495, 0.24699482, -0.26789218, 0.16414545, 0.22879964,
92+
-0.15821624, -0.23149055, 0.26526868, -0.11006282, -0.20480227,
93+
0.29863110, 0.24005184, -0.09062263, 0.22294718, 0.32583672,
94+
-0.10362835, 0.03243832, 0.24707781, 0.27685603, 0.03360258,
95+
-0.00209959, 0.27976128, -0.24468939, -0.19273037, -0.25921509,
96+
-0.20489319, 0.33036807, 0.27226517, -0.25207010};
97+
nntrainer::Tensor input({1, 1, 8, 8}, input_data);
98+
99+
int8_t qdata[] = {-109, 9, 16, 14, 66, 16, 56, -58, -29, 127, 61,
100+
-35, -104, 121, -92, -122, 51, 68, -97, 114, 31, -33,
101+
33, -110, -98, -60, -69, -118, 25, -18, 62, 8, 39,
102+
-107, 60, -33, 91, -99, 61, 85, -58, -86, 98, -41,
103+
-76, 110, 89, -33, 82, 120, -38, 12, 91, 102, 12,
104+
-1, 103, -90, -71, -96, -76, 122, 101, -93};
105+
nntrainer::Tensor quant_answer(
106+
{1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata);
107+
108+
float output_data[] = {
109+
-0.29509223, 0.02436541, 0.04331629, 0.03790175, 0.17867969,
110+
0.04331629, 0.15160701, -0.15702155, -0.07851078, 0.34382305,
111+
0.16514336, -0.09475438, -0.28155589, 0.32757944, -0.24906866,
112+
-0.33028671, 0.13807067, 0.18409424, -0.26260501, 0.30862856,
113+
0.08392531, -0.08933984, 0.08933984, -0.29779950, -0.26531228,
114+
-0.16243608, -0.18680149, -0.31945765, 0.06768170, -0.04873083,
115+
0.16785063, 0.02165814, 0.10558346, -0.28967768, 0.16243608,
116+
-0.08933984, 0.24636140, -0.26801956, 0.16514336, 0.23011778,
117+
-0.15702155, -0.23282506, 0.26531228, -0.11099799, -0.20575237,
118+
0.29779950, 0.24094686, -0.08933984, 0.22199598, 0.32487217,
119+
-0.10287619, 0.03248722, 0.24636140, 0.27614135, 0.03248722,
120+
-0.00270727, 0.27884862, -0.24365413, -0.19221604, -0.25989774,
121+
-0.20575237, 0.33028671, 0.27343407, -0.25177592};
122+
nntrainer::Tensor float_answer({1, 1, 8, 8}, output_data);
123+
124+
// Per tensor affine quantizer
125+
std::unique_ptr<nntrainer::Quantizer> quantizer =
126+
nntrainer::Quantization::createQuantizer(
127+
nntrainer::QScheme::PER_TENSOR_AFFINE);
128+
129+
// Perform Quantization
130+
nntrainer::Tensor quantized_tensor =
131+
quantizer->quantize(input, nntrainer::Tdatatype::QINT8);
132+
ASSERT_EQ(quantized_tensor, quant_answer);
133+
134+
// Perform Deuantization
135+
nntrainer::Tensor output =
136+
quantizer->dequantize(quantized_tensor, nntrainer::Tdatatype::FP32);
137+
ASSERT_EQ(output, float_answer);
138+
}
139+
140+
int main(int argc, char **argv) {
141+
int result = -1;
142+
143+
try {
144+
testing::InitGoogleTest(&argc, argv);
145+
} catch (...) {
146+
std::cerr << "Error during InitGoogleTest" << std::endl;
147+
return 0;
148+
}
149+
150+
try {
151+
result = RUN_ALL_TESTS();
152+
} catch (...) {
153+
std::cerr << "Error during RUN_ALL_TESTS()" << std::endl;
154+
}
155+
156+
return result;
157+
}

0 commit comments

Comments
 (0)