From 8ecfe4fbd5b88b23bde5fc9a628f9cfb5036eb67 Mon Sep 17 00:00:00 2001 From: Kirill Golikov Date: Thu, 12 Sep 2024 12:27:36 +0200 Subject: [PATCH] [NeoMLTest] Fix CFloatHandleStackVar (#1117) * [NeoMLTest] Fix gpu SpaceToDepthTest Signed-off-by: Kirill Golikov * [NeoMLTest] micro speed-up (CFloatHandleVar --> CFloatHandleStackVar) Signed-off-by: Kirill Golikov * [NeoMLTest] LossLayer Tests (CFloatHandleVar --> CFloatHandleStackVar) Signed-off-by: Kirill Golikov --------- Signed-off-by: Kirill Golikov --- NeoML/src/Dnn/Layers/LossLayer.cpp | 141 +++++++++--------- NeoML/test/src/SpaceToDepthTest.cpp | 4 +- .../GPU/Vulkan/VulkanMathEngineDnnConvs.cpp | 2 +- .../test/src/inference/BlobMergeByDimTest.cpp | 4 +- .../VectorMultichannelLookupAndCopyTest.cpp | 4 +- .../test/src/learn/TransposeMatrixTest.cpp | 6 +- ...torMultichannelLookupAndAddToTableTest.cpp | 2 +- 7 files changed, 85 insertions(+), 78 deletions(-) diff --git a/NeoML/src/Dnn/Layers/LossLayer.cpp b/NeoML/src/Dnn/Layers/LossLayer.cpp index 1c43c429a..6f256fc66 100644 --- a/NeoML/src/Dnn/Layers/LossLayer.cpp +++ b/NeoML/src/Dnn/Layers/LossLayer.cpp @@ -194,14 +194,15 @@ template float CLossLayer::testImpl(int batchSize, CConstFloatHandle data, int vectorSize, CTypedMemoryHandle label, int labelSize, CConstFloatHandle dataDelta) { - int totalSize = batchSize * vectorSize; + const int totalSize = batchSize * vectorSize; + CFloatHandleStackVar temp( MathEngine(), ( 2 * totalSize ) + ( 3 * batchSize ) + 1 ); - CFloatHandleVar lossValue(MathEngine(), batchSize); // the function value in data point - CFloatHandleVar lossGradient(MathEngine(), totalSize); // the function gradient in data point - CFloatHandleVar dataShift(MathEngine(), totalSize); // the data + dataDelta point - CFloatHandleVar lossValueShift(MathEngine(), batchSize); // the function value in data + dataDelta point - CFloatHandleVar lossValueShiftApp(MathEngine(), batchSize); // the function approximation in data + dataDelta point - CFloatHandleStackVar l2( MathEngine() ); // L2-measure (lossValueShiftApp - lossValueShift) + CFloatHandle lossValue = temp; // the function value in data point + CFloatHandle lossGradient = lossValue + batchSize; // the function gradient in data point + CFloatHandle dataShift = lossGradient + totalSize; // the data + dataDelta point + CFloatHandle lossValueShift = dataShift + totalSize; // the function value in data + dataDelta point + CFloatHandle lossValueShiftApp = lossValueShift + batchSize; // the function approximation in data + dataDelta point + CFloatHandle l2 = lossValueShiftApp + batchSize; // L2-measure (lossValueShiftApp - lossValueShift) CPtr oldWeights = weights; weights = CDnnBlob::CreateVector(MathEngine(), CT_Float, batchSize); @@ -209,24 +210,24 @@ float CLossLayer::testImpl(int batchSize, CConstFloatHandle data, int vectorSize // Estimate BatchCalculateLossAndGradient(batchSize, data, vectorSize, - label, labelSize, lossValue.GetHandle(), lossGradient.GetHandle()); + label, labelSize, lossValue, lossGradient); - MathEngine().VectorAdd(data, dataDelta, dataShift.GetHandle(), totalSize); - BatchCalculateLossAndGradient(batchSize, dataShift.GetHandle(), vectorSize, - label, labelSize, lossValueShift.GetHandle(), CFloatHandle()); + MathEngine().VectorAdd(data, dataDelta, dataShift, totalSize); + BatchCalculateLossAndGradient(batchSize, dataShift, vectorSize, + label, labelSize, lossValueShift, CFloatHandle{}); for(int i = 0; i < batchSize; ++i) { - MathEngine().VectorDotProduct(lossGradient.GetHandle() + i * vectorSize, - dataDelta + i * vectorSize, vectorSize, lossValueShiftApp.GetHandle() + i); + MathEngine().VectorDotProduct(lossGradient + i * vectorSize, + dataDelta + i * vectorSize, vectorSize, lossValueShiftApp + i); } - MathEngine().VectorAdd(lossValueShiftApp.GetHandle(), lossValue.GetHandle(), - lossValueShiftApp.GetHandle(), batchSize); - MathEngine().VectorSub(lossValueShiftApp.GetHandle(), lossValueShift.GetHandle(), - lossValueShiftApp.GetHandle(), batchSize); - MathEngine().VectorDotProduct(lossValueShiftApp.GetHandle(), lossValueShiftApp.GetHandle(), - batchSize, l2.GetHandle()); + MathEngine().VectorAdd(lossValueShiftApp, lossValue, + lossValueShiftApp, batchSize); + MathEngine().VectorSub(lossValueShiftApp, lossValueShift, + lossValueShiftApp, batchSize); + MathEngine().VectorDotProduct(lossValueShiftApp, lossValueShiftApp, + batchSize, l2); - float res = l2.GetHandle().GetValue() / batchSize; + float res = l2.GetValue() / batchSize; weights = oldWeights; // restore the old weight values @@ -248,67 +249,73 @@ float CLossLayer::Test(int batchSize, CConstFloatHandle data, int vectorSize, CC float CLossLayer::TestRandom(CRandom& random, int batchSize, float dataLabelMin, float dataLabelMax, float deltaAbsMax, int vectorSize) { - int totalSize = batchSize * vectorSize; + NeoAssert( batchSize > 0 && vectorSize > 0 ); + NeoAssert( dataLabelMin < dataLabelMax && deltaAbsMax > 0 ); - CArray temp; + const int totalSize = batchSize * vectorSize; + CFloatHandleStackVar temp( MathEngine(), totalSize * 3 ); - CFloatHandleVar data( MathEngine(), totalSize ); - temp.SetSize(totalSize); - for(int i = 0; i < totalSize; ++i) { - temp[i] = (float)random.Uniform(dataLabelMin, dataLabelMax); - } - MathEngine().DataExchangeTyped(data.GetHandle(), temp.GetPtr(), totalSize); + CFloatHandle data = temp; + CFloatHandle label = data + totalSize; + CFloatHandle delta = label + totalSize; + { + CArray buf; + buf.SetSize( totalSize ); - CFloatHandleVar label( MathEngine(), totalSize ); - temp.SetSize(totalSize); - for(int i = 0; i < totalSize; ++i) { - temp[i] = (float)random.Uniform(dataLabelMin, dataLabelMax); - } - MathEngine().DataExchangeTyped(label.GetHandle(), temp.GetPtr(), totalSize); + for( int i = 0; i < totalSize; ++i ) { + buf[i] = static_cast( random.Uniform(dataLabelMin, dataLabelMax) ); + } + MathEngine().DataExchangeTyped(data, buf.GetPtr(), totalSize); - NeoAssert(deltaAbsMax > 0); - CFloatHandleVar delta( MathEngine(), totalSize ); - temp.SetSize(totalSize); - for(int i = 0; i < totalSize; ++i) { - temp[i] = (float)random.Uniform(-deltaAbsMax, deltaAbsMax); - } - MathEngine().DataExchangeTyped(delta.GetHandle(), temp.GetPtr(), totalSize); + for( int i = 0; i < totalSize; ++i ) { + buf[i] = static_cast( random.Uniform(dataLabelMin, dataLabelMax) ); + } + MathEngine().DataExchangeTyped(label, buf.GetPtr(), totalSize); - return Test(batchSize, data.GetHandle(), vectorSize, label.GetHandle(), vectorSize, delta.GetHandle()); + for( int i = 0; i < totalSize; ++i ) { + buf[i] = static_cast( random.Uniform(-deltaAbsMax, deltaAbsMax) ); + } + MathEngine().DataExchangeTyped(delta, buf.GetPtr(), totalSize); + } + return Test(batchSize, data, vectorSize, label, vectorSize, delta); } -float CLossLayer::TestRandom(CRandom& random, int batchSize, float dataMin, float dataMax, int labelMax, float deltaAbsMax, - int vectorSize) +float CLossLayer::TestRandom( CRandom& random, int batchSize, float dataMin, float dataMax, int labelMax, float deltaAbsMax, + int vectorSize ) { - int totalSize = batchSize * vectorSize; + NeoAssert( batchSize > 0 && vectorSize > 0 ); + NeoAssert( dataMin < dataMax && labelMax > 0 && deltaAbsMax > 0 ); - CArray temp; + const int totalSize = batchSize * vectorSize; + CFloatHandleStackVar temp( MathEngine(), totalSize * 2 ); - CFloatHandleVar data( MathEngine(), totalSize ); - temp.SetSize(totalSize); - for(int i = 0; i < totalSize; ++i) { - temp[i] = (float)random.Uniform(dataMin, dataMax); - } - MathEngine().DataExchangeTyped(data.GetHandle(), temp.GetPtr(), totalSize); + CFloatHandle data = temp; + CFloatHandle delta = data + totalSize; + { + CArray buf; + buf.SetSize( totalSize ); - NeoAssert(labelMax > 0); - CPtr label = CDnnBlob::CreateVector(MathEngine(), CT_Int, batchSize); - CArray tempInt; - tempInt.SetSize(batchSize); - for(int i = 0; i < batchSize; ++i) { - tempInt[i] = random.UniformInt(0, labelMax - 1); - } - MathEngine().DataExchangeTyped(label->GetData(), tempInt.GetPtr(), batchSize); + for( int i = 0; i < totalSize; ++i ) { + buf[i] = static_cast( random.Uniform(dataMin, dataMax) ); + } + MathEngine().DataExchangeTyped(data, buf.GetPtr(), totalSize); - NeoAssert(deltaAbsMax > 0); - CFloatHandleVar delta( MathEngine(), totalSize ); - temp.SetSize(totalSize); - for(int i = 0; i < totalSize; ++i) { - temp[i] = (float)random.Uniform(-deltaAbsMax, deltaAbsMax); + for( int i = 0; i < totalSize; ++i ) { + buf[i] = static_cast( random.Uniform(-deltaAbsMax, deltaAbsMax) ); + } + MathEngine().DataExchangeTyped(delta, buf.GetPtr(), totalSize); } - MathEngine().DataExchangeTyped(delta.GetHandle(), temp.GetPtr(), totalSize); - return Test(batchSize, data.GetHandle(), vectorSize, label->GetData(), 1, delta.GetHandle()); + CIntHandleStackVar label(MathEngine(), batchSize); + { + CArray bufInt; + bufInt.SetSize( batchSize ); + for( int i = 0; i < batchSize; ++i ) { + bufInt[i] = random.UniformInt(0, labelMax - 1); + } + MathEngine().DataExchangeTyped(label, bufInt.GetPtr(), batchSize); + } + return Test(batchSize, data, vectorSize, label, 1, delta); } } // namespace NeoML diff --git a/NeoML/test/src/SpaceToDepthTest.cpp b/NeoML/test/src/SpaceToDepthTest.cpp index 3a2a840c1..86248466c 100644 --- a/NeoML/test/src/SpaceToDepthTest.cpp +++ b/NeoML/test/src/SpaceToDepthTest.cpp @@ -158,7 +158,7 @@ static void spaceToDepthTestInt( const CTestParams& params, int seed ) s2dnn.RunOnce(); CPtr result = sink->GetBlob(); - int* buffer = result->GetBuffer( 0, dataSize, /*exchange*/false ); + int* buffer = result->GetBuffer( 0, dataSize, /*exchange*/true ); for( int i = 0; i < dataSize; ++i ) { EXPECT_EQ( convertedData[i], buffer[i] ) << i; } @@ -175,7 +175,7 @@ static void spaceToDepthTestInt( const CTestParams& params, int seed ) d2snn.RunOnce(); CPtr result = sink->GetBlob(); - int* buffer = result->GetBuffer( 0, dataSize, /*exchange*/false ); + int* buffer = result->GetBuffer( 0, dataSize, /*exchange*/true ); for( int i = 0; i < dataSize; ++i ) { EXPECT_EQ( originalData[i], buffer[i] ) << i; } diff --git a/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp b/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp index 4cc79841e..2767dd4d8 100644 --- a/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp +++ b/NeoMathEngine/src/GPU/Vulkan/VulkanMathEngineDnnConvs.cpp @@ -139,7 +139,7 @@ void CVulkanMathEngine::BlobRleConvolution( const CRleConvolutionDesc& desc, con const CVulkanRleConvolutionDesc& rleDesc = static_cast( desc ); const CCommonConvolutionDesc* convDesc = static_cast( rleDesc.ConvDesc ); - CFloatHandleVar inputConverted( mathEngine(), convDesc->Source.BlobSize() ); + CFloatHandleStackVar inputConverted( mathEngine(), convDesc->Source.BlobSize() ); blobConvertFromRleCommon( rleDesc, sourceData, inputConverted ); BlobConvolution( *(rleDesc.ConvDesc), inputConverted, filterData, freeTermData, resultData ); } diff --git a/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp b/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp index f07ea7ac7..bcfd7a28a 100644 --- a/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp +++ b/NeoMathEngine/test/src/inference/BlobMergeByDimTest.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -93,7 +93,7 @@ static void blobMergeByDimTestImpl( const CTestParams& params, int seed ) expected.resize( resultDesc.BlobSize() ); blobMergeByDimNaive( mergeDim, sourceDescs.data(), sourceData, fromCount, resultDesc, expected ); - CFloatHandleVar resultHandle( MathEngine(), resultDesc.BlobSize() ); + CFloatHandleStackVar resultHandle( MathEngine(), resultDesc.BlobSize() ); MathEngine().BlobMergeByDim( static_cast( mergeDim ), sourceDescs.data(), fromHandles.data(), fromCount, resultDesc, resultHandle.GetHandle() ); for(size_t i = 0; i < fromHandleVars.size(); i++) { diff --git a/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp b/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp index 5f6549424..9bbf34f37 100644 --- a/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp +++ b/NeoMathEngine/test/src/inference/VectorMultichannelLookupAndCopyTest.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ static void multichannelLookupAndCopyImpl( const CTestParams& params, int seed ) } } - CMemoryHandleVar inputHandle( MathEngine(), inputData.size() ); + CMemoryHandleStackVar inputHandle( MathEngine(), inputData.size() ); MathEngine().DataExchangeTyped( inputHandle.GetHandle(), inputData.data(), inputData.size() ); std::vector result; diff --git a/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp b/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp index a5c244418..27042c1db 100644 --- a/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp +++ b/NeoMathEngine/test/src/learn/TransposeMatrixTest.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -55,9 +55,9 @@ static void transposeMatrixTestImpl( const CTestParams& params, int seed ) } } - CMemoryHandleVar from( MathEngine(), matrixSize ); + CMemoryHandleStackVar from( MathEngine(), matrixSize ); MathEngine().DataExchangeTyped( from, matrix.data(), matrixSize ); - CMemoryHandleVar result( MathEngine(), matrixSize ); + CMemoryHandleStackVar result( MathEngine(), matrixSize ); MathEngine().TransposeMatrix( batchSize, from, height, medium, width, channels, result, matrixSize ); MathEngine().DataExchangeTyped( matrix.data(), result, matrixSize ); diff --git a/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp b/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp index 312964d91..9135902b7 100644 --- a/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp +++ b/NeoMathEngine/test/src/learn/VectorMultichannelLookupAndAddToTableTest.cpp @@ -72,7 +72,7 @@ static void multichannelLookupAndAddToTableImpl( const CTestParams& params, int resultChannelCount += lookupDimensions[i].VectorSize; } - CMemoryHandleVar inputHandle( MathEngine(), inputData.size() ); + CMemoryHandleStackVar inputHandle( MathEngine(), inputData.size() ); MathEngine().DataExchangeTyped( inputHandle.GetHandle(), inputData.data(), inputData.size() ); CREATE_FILL_FLOAT_ARRAY( matrix, valuesInterval.Begin, valuesInterval.End, batchSize * resultChannelCount, random )