From 8ecfe4fbd5b88b23bde5fc9a628f9cfb5036eb67 Mon Sep 17 00:00:00 2001
From: Kirill Golikov <kirill.golikov@abbyy.com>
Date: Thu, 12 Sep 2024 12:27:36 +0200
Subject: [PATCH] [NeoMLTest] Fix CFloatHandleStackVar (#1117)

* [NeoMLTest] Fix gpu SpaceToDepthTest

Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>

* [NeoMLTest] micro speed-up (CFloatHandleVar --> CFloatHandleStackVar)

Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>

* [NeoMLTest] LossLayer Tests (CFloatHandleVar  --> CFloatHandleStackVar)

Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>

---------

Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>
---
 NeoML/src/Dnn/Layers/LossLayer.cpp            | 141 +++++++++---------
 NeoML/test/src/SpaceToDepthTest.cpp           |   4 +-
 .../GPU/Vulkan/VulkanMathEngineDnnConvs.cpp   |   2 +-
 .../test/src/inference/BlobMergeByDimTest.cpp |   4 +-
 .../VectorMultichannelLookupAndCopyTest.cpp   |   4 +-
 .../test/src/learn/TransposeMatrixTest.cpp    |   6 +-
 ...torMultichannelLookupAndAddToTableTest.cpp |   2 +-
 7 files changed, 85 insertions(+), 78 deletions(-)
diff --git a/NeoML/src/Dnn/Layers/LossLayer.cpp b/NeoML/src/Dnn/Layers/LossLayer.cpp
index 1c43c429a..6f256fc66 100644
--- a/NeoML/src/Dnn/Layers/LossLayer.cpp
+++ b/NeoML/src/Dnn/Layers/LossLayer.cpp
@@ -194,14 +194,15 @@ template<class T>
 float CLossLayer::testImpl(int batchSize, CConstFloatHandle data, int vectorSize, CTypedMemoryHandle<const T> label,
 	int labelSize, CConstFloatHandle dataDelta)
 {
-	int totalSize = batchSize * vectorSize;
+	const int totalSize = batchSize * vectorSize;
+	CFloatHandleStackVar temp( MathEngine(), ( 2 * totalSize ) + ( 3 * batchSize ) + 1 );
 
-	CFloatHandleVar lossValue(MathEngine(), batchSize);	// the function value in data point
-	CFloatHandleVar lossGradient(MathEngine(), totalSize); // the function gradient in data point
-	CFloatHandleVar dataShift(MathEngine(), totalSize); // the data + dataDelta point
-	CFloatHandleVar lossValueShift(MathEngine(), batchSize); // the function value in data + dataDelta point
-	CFloatHandleVar lossValueShiftApp(MathEngine(), batchSize); // the function approximation in data + dataDelta point
-	CFloatHandleStackVar l2( MathEngine() ); // L2-measure (lossValueShiftApp - lossValueShift)
+	CFloatHandle lossValue = temp; // the function value in data point
+	CFloatHandle lossGradient = lossValue + batchSize; // the function gradient in data point
+	CFloatHandle dataShift = lossGradient + totalSize; // the data + dataDelta point
+	CFloatHandle lossValueShift = dataShift + totalSize; // the function value in data + dataDelta point
+	CFloatHandle lossValueShiftApp = lossValueShift + batchSize; // the function approximation in data + dataDelta point
+	CFloatHandle l2 = lossValueShiftApp + batchSize; // L2-measure (lossValueShiftApp - lossValueShift)
 
 	CPtr<CDnnBlob> oldWeights = weights;
 	weights = CDnnBlob::CreateVector(MathEngine(), CT_Float, batchSize);
@@ -209,24 +210,24 @@ float CLossLayer::testImpl(int batchSize, CConstFloatHandle data, int vectorSize
 
 	// Estimate
 	BatchCalculateLossAndGradient(batchSize, data, vectorSize,
-		label, labelSize, lossValue.GetHandle(), lossGradient.GetHandle());
+		label, labelSize, lossValue, lossGradient);
 
-	MathEngine().VectorAdd(data, dataDelta, dataShift.GetHandle(), totalSize);
-	BatchCalculateLossAndGradient(batchSize, dataShift.GetHandle(), vectorSize,
-		label, labelSize, lossValueShift.GetHandle(), CFloatHandle());
+	MathEngine().VectorAdd(data, dataDelta, dataShift, totalSize);
+	BatchCalculateLossAndGradient(batchSize, dataShift, vectorSize,
+		label, labelSize, lossValueShift, CFloatHandle{});
 
 	for(int i = 0; i < batchSize; ++i) {
-		MathEngine().VectorDotProduct(lossGradient.GetHandle() + i * vectorSize,
-			dataDelta + i * vectorSize, vectorSize, lossValueShiftApp.GetHandle() + i);
+		MathEngine().VectorDotProduct(lossGradient + i * vectorSize,
+			dataDelta + i * vectorSize, vectorSize, lossValueShiftApp + i);
 	}
-	MathEngine().VectorAdd(lossValueShiftApp.GetHandle(), lossValue.GetHandle(),
-		lossValueShiftApp.GetHandle(), batchSize);
-	MathEngine().VectorSub(lossValueShiftApp.GetHandle(), lossValueShift.GetHandle(),
-		lossValueShiftApp.GetHandle(), batchSize);
-	MathEngine().VectorDotProduct(lossValueShiftApp.GetHandle(), lossValueShiftApp.GetHandle(),
-		batchSize, l2.GetHandle());
+	MathEngine().VectorAdd(lossValueShiftApp, lossValue,
+		lossValueShiftApp, batchSize);
+	MathEngine().VectorSub(lossValueShiftApp, lossValueShift,
+		lossValueShiftApp, batchSize);
+	MathEngine().VectorDotProduct(lossValueShiftApp, lossValueShiftApp,
+		batchSize, l2);
 
-	float res = l2.GetHandle().GetValue() / batchSize;
+	float res = l2.GetValue() / batchSize;
 
 	weights = oldWeights; // restore the old weight values
 
@@ -248,67 +249,73 @@ float CLossLayer::Test(int batchSize, CConstFloatHandle data, int vectorSize, CC
 float CLossLayer::TestRandom(CRandom& random, int batchSize, float dataLabelMin, float dataLabelMax, float deltaAbsMax,
 	int vectorSize)
 {
-	int totalSize = batchSize * vectorSize;
+	NeoAssert( batchSize > 0 && vectorSize > 0 );
+	NeoAssert( dataLabelMin < dataLabelMax && deltaAbsMax > 0 );
 
-	CArray<float> temp;
+	const int totalSize = batchSize * vectorSize;
+	CFloatHandleStackVar temp( MathEngine(), totalSize * 3 );
 
-	CFloatHandleVar data( MathEngine(), totalSize );
-	temp.SetSize(totalSize);
-	for(int i = 0; i < totalSize; ++i) {
-		temp[i] = (float)random.Uniform(dataLabelMin, dataLabelMax);
-	}
-	MathEngine().DataExchangeTyped(data.GetHandle(), temp.GetPtr(), totalSize);
+	CFloatHandle data = temp;
+	CFloatHandle label = data + totalSize;
+	CFloatHandle delta = label + totalSize;
+	{
+		CArray<float> buf;
+		buf.SetSize( totalSize );
 
-	CFloatHandleVar label( MathEngine(), totalSize );
-	temp.SetSize(totalSize);
-	for(int i = 0; i < totalSize; ++i) {
-		temp[i] = (float)random.Uniform(dataLabelMin, dataLabelMax);
-	}
-	MathEngine().DataExchangeTyped(label.GetHandle(), temp.GetPtr(), totalSize);
+		for( int i = 0; i < totalSize; ++i ) {
+			buf[i] = static_cast<float>( random.Uniform(dataLabelMin, dataLabelMax) );
+		}
+		MathEngine().DataExchangeTyped(data, buf.GetPtr(), totalSize);
 
-	NeoAssert(deltaAbsMax > 0);
-	CFloatHandleVar delta( MathEngine(), totalSize );
-	temp.SetSize(totalSize);
-	for(int i = 0; i < totalSize; ++i) {
-		temp[i] = (float)random.Uniform(-deltaAbsMax, deltaAbsMax);
-	}
-	MathEngine().DataExchangeTyped(delta.GetHandle(), temp.GetPtr(), totalSize);
+		for( int i = 0; i < totalSize; ++i ) {
+			buf[i] = static_cast<float>( random.Uniform(dataLabelMin, dataLabelMax) );
+		}
+		MathEngine().DataExchangeTyped(label, buf.GetPtr(), totalSize);
 
-	return Test(batchSize, data.GetHandle(), vectorSize, label.GetHandle(), vectorSize, delta.GetHandle());
+		for( int i = 0; i < totalSize; ++i ) {
+			buf[i] = static_cast<float>( random.Uniform(-deltaAbsMax, deltaAbsMax) );
+		}
+		MathEngine().DataExchangeTyped(delta, buf.GetPtr(), totalSize);
+	}
+	return Test(batchSize, data, vectorSize, label, vectorSize, delta);
 }
 
-float CLossLayer::TestRandom(CRandom& random, int batchSize, float dataMin, float dataMax, int labelMax, float deltaAbsMax,
-	int vectorSize)
+float CLossLayer::TestRandom( CRandom& random, int batchSize, float dataMin, float dataMax, int labelMax, float deltaAbsMax,
+	int vectorSize )
 {
-	int totalSize = batchSize * vectorSize;
+	NeoAssert( batchSize > 0 && vectorSize > 0 );
+	NeoAssert( dataMin < dataMax && labelMax > 0 && deltaAbsMax > 0 );
 
-	CArray<float> temp;
+	const int totalSize = batchSize * vectorSize;
+	CFloatHandleStackVar temp( MathEngine(), totalSize * 2 );
 
-	CFloatHandleVar data( MathEngine(), totalSize );
-	temp.SetSize(totalSize);
-	for(int i = 0; i < totalSize; ++i) {
-		temp[i] = (float)random.Uniform(dataMin, dataMax);
-	}
-	MathEngine().DataExchangeTyped(data.GetHandle(), temp.GetPtr(), totalSize);
+	CFloatHandle data = temp;
+	CFloatHandle delta = data + totalSize;
+	{
+		CArray<float> buf;
+		buf.SetSize( totalSize );
 
-	NeoAssert(labelMax > 0);
-	CPtr<CDnnBlob> label = CDnnBlob::CreateVector(MathEngine(), CT_Int, batchSize);
-	CArray<int> tempInt;
-	tempInt.SetSize(batchSize);
-	for(int i = 0; i < batchSize; ++i) {
-		tempInt[i] = random.UniformInt(0, labelMax - 1);
-	}
-	MathEngine().DataExchangeTyped(label->GetData<int>(), tempInt.GetPtr(), batchSize);
+		for( int i = 0; i < totalSize; ++i ) {
+			buf[i] = static_cast<float>( random.Uniform(dataMin, dataMax) );
+		}
+		MathEngine().DataExchangeTyped(data, buf.GetPtr(), totalSize);
 
-	NeoAssert(deltaAbsMax > 0);
-	CFloatHandleVar delta( MathEngine(), totalSize );
-	temp.SetSize(totalSize);
-	for(int i = 0; i < totalSize; ++i) {
-		temp[i] = (float)random.Uniform(-deltaAbsMax, deltaAbsMax);
+		for( int i = 0; i < totalSize; ++i ) {
+			buf[i] = static_cast<float>( random.Uniform(-deltaAbsMax, deltaAbsMax) );
+		}
+		MathEngine().DataExchangeTyped(delta, buf.GetPtr(), totalSize);
 	}
-	MathEngine().DataExchangeTyped(delta.GetHandle(), temp.GetPtr(), totalSize);
 
-	return Test(batchSize, data.GetHandle(), vectorSize, label->GetData<int>(), 1, delta.GetHandle());
+	CIntHandleStackVar label(MathEngine(), batchSize);
+	{
+		CArray<int> bufInt;
+		bufInt.SetSize( batchSize );
+		for( int i = 0; i < batchSize; ++i ) {
+			bufInt[i] = random.UniformInt(0, labelMax - 1);
+		}
+		MathEngine().DataExchangeTyped<int>(label, bufInt.GetPtr(), batchSize);
+	}
+	return Test(batchSize, data, vectorSize, label, 1, delta);
 }
 
 } // namespace NeoML
diff --git a/NeoML/test/src/SpaceToDepthTest.cpp b/NeoML/test/src/SpaceToDepthTest.cpp
index 3a2a840c1..86248466c 100644
--- a/NeoML/test/src/SpaceToDepthTest.cpp
+++ b/NeoML/test/src/SpaceToDepthTest.cpp
@@ -158,7 +158,7 @@ static void spaceToDepthTestInt( const CTestParams& params, int seed )
 		s2dnn.RunOnce();
 
 		CPtr<CDnnBlob> result = sink->GetBlob();
-		int* buffer = result->GetBuffer<int>( 0, dataSize, /*exchange*/false );
+		int* buffer = result->GetBuffer<int>( 0, dataSize, /*exchange*/true );
 		for( int i = 0; i < dataSize; ++i ) {
 			EXPECT_EQ( convertedData[i], buffer[i] ) << i;
 		}
@@ -175,7 +175,7 @@ static void spaceToDepthTestInt( const CTestParams& params, int seed )
 		d2snn.RunOnce();
 
 		CPtr<CDnnBlob> result = sink->GetBlob();
-		int* buffer = result->GetBuffer<int>( 0, dataSize, /*exchange*/false );
+		int* buffer = result->GetBuffer<int>( 0, dataSize, /*exchange*/true );
 		for( int i = 0; i < dataSize; ++i ) {
 			EXPECT_EQ( originalData[i], buffer[i] ) << i;
 		}
diff --git a/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp b/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp
index 4cc79841e..2767dd4d8 100644
--- a/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp
+++ b/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp
@@ -139,7 +139,7 @@ void CVulkanMathEngine::BlobRleConvolution( const CRleConvolutionDesc& desc, con
 	const CVulkanRleConvolutionDesc& rleDesc = static_cast<const CVulkanRleConvolutionDesc&>( desc );
 	const CCommonConvolutionDesc* convDesc = static_cast<const CCommonConvolutionDesc*>( rleDesc.ConvDesc );
 
-	CFloatHandleVar inputConverted( mathEngine(), convDesc->Source.BlobSize() );
+	CFloatHandleStackVar inputConverted( mathEngine(), convDesc->Source.BlobSize() );
 	blobConvertFromRleCommon( rleDesc, sourceData, inputConverted );
 	BlobConvolution( *(rleDesc.ConvDesc), inputConverted, filterData, freeTermData, resultData );
 }
diff --git a/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp b/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp
index f07ea7ac7..bcfd7a28a 100644
--- a/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp
+++ b/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -93,7 +93,7 @@ static void blobMergeByDimTestImpl( const CTestParams& params, int seed )
 	expected.resize( resultDesc.BlobSize() );
 	blobMergeByDimNaive( mergeDim, sourceDescs.data(), sourceData, fromCount, resultDesc, expected );
 
-	CFloatHandleVar resultHandle( MathEngine(), resultDesc.BlobSize() );
+	CFloatHandleStackVar resultHandle( MathEngine(), resultDesc.BlobSize() );
 	MathEngine().BlobMergeByDim( static_cast<TBlobDim>( mergeDim ), sourceDescs.data(), fromHandles.data(), fromCount, resultDesc, resultHandle.GetHandle() );
 	
 	for(size_t i = 0; i < fromHandleVars.size(); i++) {
diff --git a/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp b/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp
index 5f6549424..9bbf34f37 100644
--- a/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp
+++ b/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -96,7 +96,7 @@ static void multichannelLookupAndCopyImpl( const CTestParams& params, int seed )
 		}
 	}
 
-	CMemoryHandleVar<TIndex> inputHandle( MathEngine(), inputData.size() );
+	CMemoryHandleStackVar<TIndex> inputHandle( MathEngine(), inputData.size() );
 	MathEngine().DataExchangeTyped( inputHandle.GetHandle(), inputData.data(), inputData.size() );
 
 	std::vector<TLookup> result;
diff --git a/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp b/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp
index a5c244418..27042c1db 100644
--- a/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp
+++ b/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -55,9 +55,9 @@ static void transposeMatrixTestImpl( const CTestParams& params, int seed )
 		}
 	}
 
-	CMemoryHandleVar<T> from( MathEngine(), matrixSize );
+	CMemoryHandleStackVar<T> from( MathEngine(), matrixSize );
 	MathEngine().DataExchangeTyped<T>( from, matrix.data(), matrixSize );
-	CMemoryHandleVar<T> result( MathEngine(), matrixSize );
+	CMemoryHandleStackVar<T> result( MathEngine(), matrixSize );
 	MathEngine().TransposeMatrix( batchSize, from, height, medium, width, channels, result, matrixSize );
 	MathEngine().DataExchangeTyped<T>( matrix.data(), result, matrixSize );
 
diff --git a/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp b/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp
index 312964d91..9135902b7 100644
--- a/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp
+++ b/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp
@@ -72,7 +72,7 @@ static void multichannelLookupAndAddToTableImpl( const CTestParams& params, int
 		resultChannelCount += lookupDimensions[i].VectorSize;
 	}
 
-	CMemoryHandleVar<T> inputHandle( MathEngine(), inputData.size() );
+	CMemoryHandleStackVar<T> inputHandle( MathEngine(), inputData.size() );
 	MathEngine().DataExchangeTyped( inputHandle.GetHandle(), inputData.data(), inputData.size() );
 	CREATE_FILL_FLOAT_ARRAY( matrix, valuesInterval.Begin, valuesInterval.End, batchSize * resultChannelCount, random )