Skip to content

Commit 79244a3

Browse files
committed
[TensorV2] Feature Scaling Functions
This pull request adds two new feature scaling functions - standardization and normalization - to the Tensor class. These functions help users preprocess input data before feeding it into models, improving model performance and accuracy. **Changes proposed in this PR:** * Added normalization() function to rescale values to a range between 0 and 1 * Added standardization() function to center data around the mean and scales to a standard deviation of 1 **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
1 parent 5701ddf commit 79244a3

File tree

7 files changed

+170
-0
lines changed

7 files changed

+170
-0
lines changed

nntrainer/tensor/float_tensor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,10 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha,
681681
return output;
682682
}
683683

684+
float FloatTensor::l2norm() const {
685+
return snrm2(size(), (float *)getData(), 1);
686+
}
687+
684688
TensorV2 &FloatTensor::pow(float exponent, TensorV2 &output) const {
685689
auto f = [exponent](float in) { return powf(in, exponent); };
686690
apply(f, output);
@@ -783,6 +787,16 @@ void FloatTensor::copyData(const TensorV2 &from) {
783787
}
784788
}
785789

790+
float FloatTensor::maxValue() const {
791+
const float *data = (float *)getData();
792+
return *std::max_element(data, data + size());
793+
}
794+
795+
float FloatTensor::minValue() const {
796+
const float *data = (float *)getData();
797+
return *std::min_element(data, data + size());
798+
}
799+
786800
TensorV2 &FloatTensor::transpose(const std::string &direction,
787801
TensorV2 &output) const {
788802
unsigned int SL, SI, SJ, SK;

nntrainer/tensor/float_tensor.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ class FloatTensor : public TensorBase {
270270
TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha,
271271
float beta) const override;
272272

273+
/**
274+
* @copydoc TensorV2::l2norm
275+
*/
276+
float l2norm() const override;
277+
273278
/**
274279
* @copydoc TensorV2::pow(float exponent, TensorV2 &output)
275280
*/
@@ -312,6 +317,16 @@ class FloatTensor : public TensorBase {
312317
*/
313318
void copyData(const TensorV2 &from);
314319

320+
/**
321+
* @copydoc Tensor::maxValue()
322+
*/
323+
float maxValue() const override;
324+
325+
/**
326+
* @copydoc Tensor::minValue()
327+
*/
328+
float minValue() const override;
329+
315330
/**
316331
* @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
317332
*/

nntrainer/tensor/half_tensor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,10 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha,
631631
return output;
632632
}
633633

634+
float HalfTensor::l2norm() const {
635+
return snrm2(size(), (_FP16 *)getData(), 1);
636+
}
637+
634638
TensorV2 &HalfTensor::pow(float exponent, TensorV2 &output) const {
635639
auto f = [exponent](float in) {
636640
return static_cast<_FP16>(powf(in, exponent));
@@ -858,6 +862,16 @@ void HalfTensor::copyData(const TensorV2 &from) {
858862
}
859863
}
860864

865+
float HalfTensor::maxValue() const {
866+
const _FP16 *data = (_FP16 *)getData();
867+
return (float)*std::max_element(data, data + size());
868+
}
869+
870+
float HalfTensor::minValue() const {
871+
const _FP16 *data = (_FP16 *)getData();
872+
return (float)*std::min_element(data, data + size());
873+
}
874+
861875
TensorV2 &HalfTensor::transpose(const std::string &direction,
862876
TensorV2 &output) const {
863877
unsigned int SL, SI, SJ, SK;

nntrainer/tensor/half_tensor.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ class HalfTensor : public TensorBase {
269269
TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha,
270270
float beta) const override;
271271

272+
/**
273+
* @copydoc TensorV2::l2norm
274+
*/
275+
float l2norm() const override;
276+
272277
/**
273278
* @copydoc TensorV2::pow(float exponent, TensorV2 &output)
274279
*/
@@ -311,6 +316,16 @@ class HalfTensor : public TensorBase {
311316
*/
312317
void copyData(const TensorV2 &from);
313318

319+
/**
320+
* @copydoc Tensor::maxValue()
321+
*/
322+
float maxValue() const override;
323+
324+
/**
325+
* @copydoc Tensor::minValue()
326+
*/
327+
float minValue() const override;
328+
314329
/**
315330
* @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
316331
*/

nntrainer/tensor/tensor_base.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,11 @@ class TensorBase {
288288
virtual TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha,
289289
float beta) const = 0;
290290

291+
/**
292+
* @copydoc TensorV2::l2norm
293+
*/
294+
virtual float l2norm() const = 0;
295+
291296
/**
292297
* @copydoc TensorV2::pow(float exponent, TensorV2 &output)
293298
*/
@@ -364,6 +369,16 @@ class TensorBase {
364369
*/
365370
virtual void copyData(const TensorV2 &from) = 0;
366371

372+
/**
373+
* @copydoc TensorV2::maxValue()
374+
*/
375+
virtual float maxValue() const = 0;
376+
377+
/**
378+
* @copydoc TensorV2::minValue()
379+
*/
380+
virtual float minValue() const = 0;
381+
367382
/**
368383
* @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out)
369384
*/

nntrainer/tensor/tensor_v2.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,57 @@ TensorV2 &TensorV2::erf(TensorV2 &output) const {
463463
return output;
464464
}
465465

466+
float TensorV2::l2norm() const { return itensor->l2norm(); }
467+
468+
void TensorV2::normalization_i() {
469+
NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
470+
<< getName() << " is not contiguous, cannot do normalization.";
471+
472+
const float min = minValue();
473+
const float max = maxValue();
474+
475+
if (max == min) {
476+
TensorV2 tmp = *this;
477+
this->subtract_i(tmp);
478+
} else {
479+
this->subtract_i(min);
480+
this->divide_i(max - min);
481+
}
482+
}
483+
484+
void TensorV2::standardization_i() {
485+
TensorV2 mean_by_batch = this->sum_by_batch();
486+
mean_by_batch.divide_i(getDim().getFeatureLen());
487+
488+
this->subtract_i(mean_by_batch);
489+
TensorV2 std_dev_by_batch(batch(), 1, 1, 1, getFormat(), getDataType());
490+
std_dev_by_batch.setZero();
491+
492+
/// @todo remove conditional statement
493+
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
494+
float *std_dev = std_dev_by_batch.getData<float>();
495+
496+
for (unsigned int k = 0; k < batch(); ++k) {
497+
TensorV2 sub_this = this->getBatchSlice(k, 1);
498+
std_dev[k] = sub_this.l2norm();
499+
}
500+
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
501+
#ifdef ENABLE_FP16
502+
_FP16 *std_dev = std_dev_by_batch.getData<_FP16>();
503+
504+
for (unsigned int k = 0; k < batch(); ++k) {
505+
TensorV2 sub_this = this->getBatchSlice(k, 1);
506+
std_dev[k] = static_cast<_FP16>(sub_this.l2norm());
507+
}
508+
#else
509+
throw std::invalid_argument("Error: enable-fp16 is not enabled");
510+
#endif
511+
}
512+
513+
std_dev_by_batch.divide_i(getDim().getFeatureLen());
514+
this->divide_i(std_dev_by_batch);
515+
}
516+
466517
TensorV2 TensorV2::dot(TensorV2 const &input, bool trans, bool trans_in) const {
467518
TensorV2 output("", this->getFormat(), this->getDataType());
468519
dot(input, output, trans, trans_in);
@@ -684,6 +735,10 @@ TensorV2 TensorV2::clone() const {
684735
return output;
685736
}
686737

738+
float TensorV2::maxValue() const { return itensor->maxValue(); }
739+
740+
float TensorV2::minValue() const { return itensor->minValue(); }
741+
687742
TensorV2 TensorV2::transpose(const std::string &direction) const {
688743
TensorV2 output(getDim());
689744
transpose(direction, output);

nntrainer/tensor/tensor_v2.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,36 @@ class TensorV2 {
844844
*/
845845
TensorV2 &erf(TensorV2 &output) const;
846846

847+
/**
848+
* @brief l2norm the Tensor elements
849+
* @retval Calculated l2norm
850+
*/
851+
float l2norm() const;
852+
853+
/**
854+
* @brief Normalize the Tensor elements
855+
* @retval Calculated Tensor
856+
*/
857+
TensorV2 &normalization(TensorV2 &output) const;
858+
859+
/**
860+
* @brief Standardize the Tensor elements
861+
* @retval Calculated Tensor
862+
*/
863+
TensorV2 &standardization(TensorV2 &output) const;
864+
865+
/**
866+
* @brief Normalize the Tensor elements in-place
867+
* @retval Calculated Tensor
868+
*/
869+
void normalization_i();
870+
871+
/**
872+
* @brief Standardize the Tensor elements in-place
873+
* @retval Calculated Tensor
874+
*/
875+
void standardization_i();
876+
847877
/**
848878
* @brief Dot Product of Tensor ( equal MxM )
849879
* @details This applies dot of the last dimension of this and second-last
@@ -1017,6 +1047,18 @@ class TensorV2 {
10171047
*/
10181048
TensorV2 clone() const;
10191049

1050+
/**
1051+
* @brief return maximum value
1052+
* @retval Maximum value of the tensor data
1053+
*/
1054+
float maxValue() const;
1055+
1056+
/**
1057+
* @brief return minimum value
1058+
* @retval Minimum value of the tensor data
1059+
*/
1060+
float minValue() const;
1061+
10201062
/**
10211063
* @brief Transpose Tensor
10221064
* @param direction to transpose ex) 0:2:1

0 commit comments

Comments
 (0)