From 3fc7467569bba00607a3f261d783c8f8bd53640a Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Fri, 27 Sep 2024 00:13:44 -0700 Subject: [PATCH 01/35] debug for knn issue --- .../kdtree_knn_classification_model.h | 3 +- ...ication_predict_dense_default_batch_impl.i | 28 +++++++++++-------- cpp/daal/src/externals/service_math_mkl.h | 20 ++++++------- .../knn/backend/cpu/infer_kernel_kd_tree.cpp | 19 +++++++------ .../knn/backend/cpu/train_kernel_kd_tree.cpp | 14 +++++----- .../knn/knn_cls_kd_tree_dense_batch.cpp | 7 +++-- 6 files changed, 50 insertions(+), 41 deletions(-) mode change 100755 => 100644 cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i diff --git a/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h b/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h index 5c5b72ff489..d255f8c5ae0 100644 --- a/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h +++ b/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h @@ -23,7 +23,7 @@ #ifndef __KDTREE_KNN_CLASSIFICATION_MODEL_H__ #define __KDTREE_KNN_CLASSIFICATION_MODEL_H__ - +#include #include "algorithms/classifier/classifier_model.h" #include "data_management/data/aos_numeric_table.h" #include "data_management/data/soa_numeric_table.h" @@ -111,6 +111,7 @@ struct DAAL_EXPORT Parameter : public daal::algorithms::classifier::Parameter resultsToCompute(resToCompute), voteWeights(vote) { + std::cout << "here param init" << std::endl; this->resultsToEvaluate = resToEvaluate; } diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i old mode 100755 new mode 100644 index 82cb20faaed..8281f15f1f4 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -122,18 +122,18 @@ Status KNNClassificationPredictKernel::compu const daal::algorithms::Parameter * par) { Status status; - + std::cout << "here debug 1" << std::endl; typedef GlobalNeighbors Neighbors; typedef Heap MaxHeap; typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; typedef daal::services::internal::MaxVal MaxVal; typedef daal::internal::MathInst Math; - + std::cout << "here debug 2" << std::endl; size_t k; size_t nClasses; VoteWeights voteWeights = voteUniform; DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; - + std::cout << "here debug 3" << std::endl; const auto par3 = dynamic_cast(par); if (par3) { @@ -142,7 +142,7 @@ Status KNNClassificationPredictKernel::compu resultsToEvaluate = par3->resultsToEvaluate; nClasses = par3->nClasses; } - + std::cout << "here debug 4" << std::endl; if (par3 == NULL) return Status(ErrorNullParameterNotSupported); const Model * const model = static_cast(m); @@ -154,7 +154,7 @@ Status KNNClassificationPredictKernel::compu { labels = model->impl()->getLabels().get(); } - + std::cout << "here debug 5" << std::endl; const NumericTable * const modelIndices = model->impl()->getIndices().get(); size_t iSize = 1; @@ -163,11 +163,15 @@ Status KNNClassificationPredictKernel::compu iSize *= 2; } const size_t heapSize = (iSize / 16 + 1) * 16; - - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; + std::cout << "here debug 6" << std::endl; + const size_t xRowCount = x->getNumberOfRows(); + const algorithmFpType base = 2.0; + std::cout << "here debug math 1" << std::endl; const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; - const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); + std::cout << "here debug math 2" << std::endl; + const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); + std::cout << "here debug math 3" << std::endl; + std::cout << "here debug 7" << std::endl; struct Local { MaxHeap heap; @@ -478,9 +482,9 @@ services::Status KNNClassificationPredictKernel static void vPowx(SizeType n, const float * in, float in1, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vCeil(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vErfInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vErf(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vExp(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static float vExpThreshold() { return -75.0f; } static void vTanh(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vSqrt(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vLog(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vLog1p(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vCdfNormInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } }; diff --git a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp index 1c2b413ef52..60050dca27e 100644 --- a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp +++ b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp @@ -15,7 +15,7 @@ *******************************************************************************/ #include - +#include #include "oneapi/dal/algo/knn/backend/model_conversion.hpp" #include "oneapi/dal/algo/knn/backend/cpu/infer_kernel.hpp" #include "oneapi/dal/algo/knn/backend/model_impl.hpp" @@ -49,7 +49,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, throw unimplemented( dal::detail::error_messages::knn_regression_task_is_not_implemented_for_cpu()); } - + std::cout << "step 1 infer" << std::endl; const std::int64_t row_count = data.get_row_count(); const std::int64_t neighbor_count = desc.get_neighbor_count(); @@ -63,15 +63,16 @@ static infer_result call_daal_kernel(const context_cpu& ctx, const std::int64_t dummy_seed = 777; const auto data_use_in_model = daal_knn::doNotUse; + std::cout << "step 2 infer" << std::endl; daal_knn::Parameter daal_parameter( dal::detail::integral_cast(desc.get_class_count()), dal::detail::integral_cast(desc.get_neighbor_count()), dal::detail::integral_cast(dummy_seed), data_use_in_model); - + std::cout << "step 3 infer" << std::endl; const auto daal_voting_mode = convert_to_daal_kdtree_voting_mode(desc.get_voting_mode()); daal_parameter.voteWeights = daal_voting_mode; - + std::cout << "step 4 infer" << std::endl; if (desc.get_result_options().test(result_options::responses)) { arr_responses.reset(1 * row_count); daal_responses = interop::convert_to_daal_homogen_table(arr_responses, row_count, 1); @@ -79,7 +80,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, else { daal_parameter.resultsToEvaluate = daal_classifier::none; } - + std::cout << "step 5 infer" << std::endl; if (desc.get_result_options().test(result_options::indices)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeIndicesOfNeighbors; @@ -87,7 +88,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, daal_indices = interop::convert_to_daal_homogen_table(arr_indices, row_count, neighbor_count); } - + std::cout << "step 6 infer" << std::endl; if (desc.get_result_options().test(result_options::distances)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeDistances; @@ -95,11 +96,11 @@ static infer_result call_daal_kernel(const context_cpu& ctx, daal_distance = interop::convert_to_daal_homogen_table(arr_distances, row_count, neighbor_count); } - + std::cout << "step 7 infer" << std::endl; const auto daal_data = interop::convert_to_daal_table(data); - + std::cout << "step 8 infer" << std::endl; const auto model_ptr = dynamic_cast_to_knn_model>(m); - + std::cout << "step 9 infer" << std::endl; interop::status_to_exception(interop::call_daal_kernel( ctx, daal_data.get(), diff --git a/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp index 622740b1a01..72d61cff84f 100644 --- a/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp +++ b/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp @@ -16,7 +16,7 @@ #include #include - +#include #include "oneapi/dal/algo/knn/backend/model_conversion.hpp" #include "oneapi/dal/algo/knn/backend/cpu/train_kernel.hpp" #include "oneapi/dal/algo/knn/backend/model_impl.hpp" @@ -50,7 +50,7 @@ static train_result call_daal_kernel(const context_cpu& ctx, throw unimplemented( dal::detail::error_messages::knn_regression_task_is_not_implemented_for_cpu()); } - + std::cout << "step 1" << std::endl; using model_t = model; using daal_model_interop_t = model_interop; const std::int64_t column_count = data.get_column_count(); @@ -63,30 +63,30 @@ static train_result call_daal_kernel(const context_cpu& ctx, dal::detail::integral_cast(desc.get_neighbor_count()), dal::detail::integral_cast(dummy_seed), data_use_in_model); - + std::cout << "step 2" << std::endl; Status status; const auto model_ptr = daal_knn::Model::create(column_count, &status); interop::status_to_exception(status); - + std::cout << "step 3" << std::endl; auto knn_model = static_cast(model_ptr.get()); // Data or responses should not be copied, copy will be happened when // the tables are passed to old ifaces const bool copy_data_responses = data_use_in_model == daal_knn::doNotUse; knn_model->impl()->setData(daal_data, copy_data_responses); - + std::cout << "step 4" << std::endl; auto daal_responses = daal::data_management::NumericTablePtr(); if (desc.get_result_options().test(result_options::responses)) { daal_responses = interop::convert_to_daal_table(responses); knn_model->impl()->setLabels(daal_responses, copy_data_responses); } - + std::cout << "step 5" << std::endl; interop::status_to_exception(interop::call_daal_kernel( ctx, knn_model->impl()->getData().get(), knn_model->impl()->getLabels().get(), knn_model, *daal_parameter.engine.get())); - + std::cout << "step 6" << std::endl; const auto model_impl = std::make_shared>(new daal_model_interop_t(model_ptr)); return train_result().set_model(dal::detail::make_private(model_impl)); diff --git a/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp b/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp index f594a7a5bab..2d57fa432a6 100644 --- a/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp +++ b/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp @@ -36,14 +36,15 @@ int main(int argc, char const *argv[]) { const auto knn_desc = dal::knn::descriptor(5, 1); - + std::cout << "before train" << std::endl; const auto train_result = dal::train(knn_desc, x_train, y_train); - + std::cout << "after train" << std::endl; const auto x_test = dal::read(dal::csv::data_source{ test_data_file_name }); const auto y_true = dal::read(dal::csv::data_source{ test_response_file_name }); + std::cout << "before infer" << std::endl; const auto test_result = dal::infer(knn_desc, x_test, train_result.get_model()); - + std::cout << "after infer" << std::endl; std::cout << "Test results:\n" << test_result.get_responses() << std::endl; std::cout << "True responses:\n" << y_true << std::endl; From 5c842ee617527049efca2885b99c7bd570b4dbb3 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 00:22:25 -0700 Subject: [PATCH 02/35] more debug prints --- ...ication_predict_dense_default_batch_impl.i | 28 ++++++++++++------- cpp/daal/src/externals/service_math_mkl.h | 20 ++++++------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 8281f15f1f4..7f726ee43e4 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -177,18 +177,21 @@ Status KNNClassificationPredictKernel::compu MaxHeap heap; SearchStack stack; }; - daal::tls localTLS([&]() -> Local * { + std::cout << "here debug 8" << std::endl; + daal::tls localTLS([=, &status]() -> Local * { Local * const ptr = service_scalable_calloc(1); if (ptr) { if (!ptr->heap.init(heapSize)) { + std::cout << "error 1" << std::endl; status.add(services::ErrorMemoryAllocationFailed); service_scalable_free(ptr); return nullptr; } if (!ptr->stack.init(stackSize)) { + std::cout << "error 2" << std::endl; status.add(services::ErrorMemoryAllocationFailed); ptr->heap.clear(); service_scalable_free(ptr); @@ -197,22 +200,26 @@ Status KNNClassificationPredictKernel::compu } else { + std::cout << "error 3" << std::endl; status.add(services::ErrorMemoryAllocationFailed); } return ptr; }); DAAL_CHECK_STATUS_OK((status.ok()), status); - - const auto maxThreads = threader_get_threads_number(); + std::cout << "here debug 9" << std::endl; + const auto maxThreads = threader_get_threads_number(); + auto nThreads = (maxThreads < 1) ? 1 : maxThreads; + std::cout << "maxthreads =" << maxThreads << std::endl; + std::cout << "nthreads =" << nThreads << std::endl; const size_t xColumnCount = x->getNumberOfColumns(); - const auto rowsPerBlock = (xRowCount + maxThreads - 1) / maxThreads; - const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; + const size_t rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; + const size_t blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; SafeStatus safeStat; - + std::cout << "here debug 11" << std::endl; services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); - + std::cout << "here debug 12" << std::endl; daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); if (local) @@ -283,10 +290,10 @@ Status KNNClassificationPredictKernel::compu const_cast(*x).releaseBlockOfRows(xBD); } }); - + std::cout << "here debug 15" << std::endl; DAAL_CHECK_SAFE_STATUS() - - localTLS.reduce([&](Local * ptr) -> void { + std::cout << "here debug 16" << std::endl; + localTLS.reduce([=](Local * ptr) -> void { if (ptr) { ptr->stack.clear(); @@ -294,6 +301,7 @@ Status KNNClassificationPredictKernel::compu service_scalable_free(ptr); } }); + std::cout << "here debug 17" << std::endl; return status; } diff --git a/cpp/daal/src/externals/service_math_mkl.h b/cpp/daal/src/externals/service_math_mkl.h index 22bc906d50e..c5b5bb3694e 100644 --- a/cpp/daal/src/externals/service_math_mkl.h +++ b/cpp/daal/src/externals/service_math_mkl.h @@ -103,54 +103,54 @@ struct MklMath static void vPowx(SizeType n, const double * in, double in1, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vCeil(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vErfInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vErf(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vExp(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static double vExpThreshold() { return -650.0; } static void vTanh(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vSqrt(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vLog(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vLog1p(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } static void vCdfNormInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); } }; From 3f68e71d76871e932877681bfab7435d46cd1c08 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 03:05:52 -0700 Subject: [PATCH 03/35] fixes --- ...ication_predict_dense_default_batch_impl.i | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 7f726ee43e4..106a38dd8c7 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -224,8 +224,6 @@ Status KNNClassificationPredictKernel::compu Local * const local = localTLS.local(); if (local) { - services::Status s; - const size_t first = iBlock * rowsPerBlock; const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); @@ -238,17 +236,17 @@ Status KNNClassificationPredictKernel::compu data_management::BlockDescriptor distancesBD; if (indices) { - s = indices->getBlockOfRows(first, last - first, writeOnly, indicesBD); - DAAL_CHECK_STATUS_THR(s); + std::cout << "in parallel for 1" << std::endl; + DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); } if (distances) { - s = distances->getBlockOfRows(first, last - first, writeOnly, distancesBD); - DAAL_CHECK_STATUS_THR(s); + std::cout << "in parallel for 2" << std::endl; + DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); } - if (labels) { + std::cout << "in parallel for 3" << std::endl; const size_t yColumnCount = y->getNumberOfColumns(); data_management::BlockDescriptor yBD; y->getBlockOfRows(first, last - first, writeOnly, yBD); @@ -258,34 +256,37 @@ Status KNNClassificationPredictKernel::compu { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays); - s = predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses); - DAAL_CHECK_STATUS_THR(s) + DAAL_CHECK_STATUS_THR( + predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - s |= y->releaseBlockOfRows(yBD); - DAAL_CHECK_STATUS_THR(s); + DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); } else { + std::cout << "in parallel for 4" << std::endl; for (size_t i = 0; i < last - first; ++i) { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays); - s = predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses); - DAAL_CHECK_STATUS_THR(s) + DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } + std::cout << "in parallel for 5" << std::endl; } if (indices) { - s |= indices->releaseBlockOfRows(indicesBD); + std::cout << "in parallel for 6" << std::endl; + DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); + std::cout << "in parallel for 6.1" << std::endl; } - DAAL_CHECK_STATUS_THR(s); + if (distances) { - s |= distances->releaseBlockOfRows(distancesBD); + std::cout << "in parallel for 7" << std::endl; + DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + std::cout << "in parallel for 7.1" << std::endl; } - DAAL_CHECK_STATUS_THR(s); const_cast(*x).releaseBlockOfRows(xBD); } @@ -302,7 +303,8 @@ Status KNNClassificationPredictKernel::compu } }); std::cout << "here debug 17" << std::endl; - return status; + + return safeStat.detach(); } template From 006e579fdcdf38ff18405f3bfc5f30acb0103645 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 03:25:42 -0700 Subject: [PATCH 04/35] fixes for knn --- ...tree_knn_classification_predict_dense_default_batch_impl.i | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 106a38dd8c7..5624403d923 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -221,6 +221,7 @@ Status KNNClassificationPredictKernel::compu bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); std::cout << "here debug 12" << std::endl; daal::threader_for(blockCount, blockCount, [&](int iBlock) { + if (!safeStat.ok()) return; Local * const local = localTLS.local(); if (local) { @@ -291,6 +292,7 @@ Status KNNClassificationPredictKernel::compu const_cast(*x).releaseBlockOfRows(xBD); } }); + status = safeStat.detach(); std::cout << "here debug 15" << std::endl; DAAL_CHECK_SAFE_STATUS() std::cout << "here debug 16" << std::endl; @@ -304,7 +306,7 @@ Status KNNClassificationPredictKernel::compu }); std::cout << "here debug 17" << std::endl; - return safeStat.detach(); + return status; } template From d8526cef49e4bef7718fb8e4542c85763fa75165 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 06:13:21 -0700 Subject: [PATCH 05/35] fixes --- .../kdtree_knn_classification_model.h | 3 +- ...ication_predict_dense_default_batch_impl.i | 25 +++++++----- cpp/daal/src/externals/service_math_mkl.h | 40 +++++++++---------- .../knn/backend/cpu/infer_kernel_kd_tree.cpp | 16 +++----- .../knn/backend/cpu/train_kernel_kd_tree.cpp | 14 +++---- .../knn/knn_cls_kd_tree_dense_batch.cpp | 7 ++-- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h b/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h index d255f8c5ae0..5c5b72ff489 100644 --- a/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h +++ b/cpp/daal/include/algorithms/k_nearest_neighbors/kdtree_knn_classification_model.h @@ -23,7 +23,7 @@ #ifndef __KDTREE_KNN_CLASSIFICATION_MODEL_H__ #define __KDTREE_KNN_CLASSIFICATION_MODEL_H__ -#include + #include "algorithms/classifier/classifier_model.h" #include "data_management/data/aos_numeric_table.h" #include "data_management/data/soa_numeric_table.h" @@ -111,7 +111,6 @@ struct DAAL_EXPORT Parameter : public daal::algorithms::classifier::Parameter resultsToCompute(resToCompute), voteWeights(vote) { - std::cout << "here param init" << std::endl; this->resultsToEvaluate = resToEvaluate; } diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 5624403d923..646d028e3a7 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" - +#include namespace daal { namespace algorithms @@ -122,6 +122,7 @@ Status KNNClassificationPredictKernel::compu const daal::algorithms::Parameter * par) { Status status; + SafeStatus safeStat; std::cout << "here debug 1" << std::endl; typedef GlobalNeighbors Neighbors; typedef Heap MaxHeap; @@ -178,21 +179,21 @@ Status KNNClassificationPredictKernel::compu SearchStack stack; }; std::cout << "here debug 8" << std::endl; - daal::tls localTLS([=, &status]() -> Local * { + daal::tls localTLS([&]() -> Local * { Local * const ptr = service_scalable_calloc(1); if (ptr) { if (!ptr->heap.init(heapSize)) { std::cout << "error 1" << std::endl; - status.add(services::ErrorMemoryAllocationFailed); + safeStat.add(services::ErrorMemoryAllocationFailed); service_scalable_free(ptr); return nullptr; } if (!ptr->stack.init(stackSize)) { std::cout << "error 2" << std::endl; - status.add(services::ErrorMemoryAllocationFailed); + safeStat.add(services::ErrorMemoryAllocationFailed); ptr->heap.clear(); service_scalable_free(ptr); return nullptr; @@ -201,7 +202,7 @@ Status KNNClassificationPredictKernel::compu else { std::cout << "error 3" << std::endl; - status.add(services::ErrorMemoryAllocationFailed); + safeStat.add(services::ErrorMemoryAllocationFailed); } return ptr; }); @@ -213,9 +214,9 @@ Status KNNClassificationPredictKernel::compu std::cout << "maxthreads =" << maxThreads << std::endl; std::cout << "nthreads =" << nThreads << std::endl; const size_t xColumnCount = x->getNumberOfColumns(); - const size_t rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; + const size_t rowsPerBlock = 128; const size_t blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; - SafeStatus safeStat; + std::cout << "here debug 11" << std::endl; services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); @@ -223,6 +224,7 @@ Status KNNClassificationPredictKernel::compu daal::threader_for(blockCount, blockCount, [&](int iBlock) { if (!safeStat.ok()) return; Local * const local = localTLS.local(); + DAAL_CHECK_MALLOC_THR(local); if (local) { const size_t first = iBlock * rowsPerBlock; @@ -248,11 +250,15 @@ Status KNNClassificationPredictKernel::compu if (labels) { std::cout << "in parallel for 3" << std::endl; + std::cout << "in parallel for lables 1" << std::endl; const size_t yColumnCount = y->getNumberOfColumns(); + std::cout << "in parallel for lables 2" << std::endl; data_management::BlockDescriptor yBD; + std::cout << "in parallel for lables 3" << std::endl; y->getBlockOfRows(first, last - first, writeOnly, yBD); + std::cout << "in parallel for lables 4" << std::endl; auto * const dy = yBD.getBlockPtr(); - + std::cout << "in parallel for lables 5" << std::endl; for (size_t i = 0; i < last - first; ++i) { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, @@ -260,8 +266,9 @@ Status KNNClassificationPredictKernel::compu DAAL_CHECK_STATUS_THR( predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - + std::cout << "in parallel for lables 6" << std::endl; DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); + std::cout << "in parallel for lables 7" << std::endl; } else { diff --git a/cpp/daal/src/externals/service_math_mkl.h b/cpp/daal/src/externals/service_math_mkl.h index c5b5bb3694e..fa5ce46a5ea 100644 --- a/cpp/daal/src/externals/service_math_mkl.h +++ b/cpp/daal/src/externals/service_math_mkl.h @@ -103,54 +103,54 @@ struct MklMath static void vPowx(SizeType n, const double * in, double in1, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCeil(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErfInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErf(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vExp(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static double vExpThreshold() { return -650.0; } static void vTanh(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vSqrt(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog1p(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCdfNormInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } }; @@ -215,54 +215,54 @@ struct MklMath static void vPowx(SizeType n, const float * in, float in1, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCeil(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErfInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErf(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vExp(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static float vExpThreshold() { return -75.0f; } static void vTanh(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vSqrt(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog1p(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCdfNormInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_STDERR))); + __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } }; diff --git a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp index 60050dca27e..c8f9c5b321f 100644 --- a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp +++ b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp @@ -15,7 +15,7 @@ *******************************************************************************/ #include -#include + #include "oneapi/dal/algo/knn/backend/model_conversion.hpp" #include "oneapi/dal/algo/knn/backend/cpu/infer_kernel.hpp" #include "oneapi/dal/algo/knn/backend/model_impl.hpp" @@ -49,7 +49,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, throw unimplemented( dal::detail::error_messages::knn_regression_task_is_not_implemented_for_cpu()); } - std::cout << "step 1 infer" << std::endl; + const std::int64_t row_count = data.get_row_count(); const std::int64_t neighbor_count = desc.get_neighbor_count(); @@ -63,16 +63,14 @@ static infer_result call_daal_kernel(const context_cpu& ctx, const std::int64_t dummy_seed = 777; const auto data_use_in_model = daal_knn::doNotUse; - std::cout << "step 2 infer" << std::endl; + daal_knn::Parameter daal_parameter( dal::detail::integral_cast(desc.get_class_count()), dal::detail::integral_cast(desc.get_neighbor_count()), dal::detail::integral_cast(dummy_seed), data_use_in_model); - std::cout << "step 3 infer" << std::endl; const auto daal_voting_mode = convert_to_daal_kdtree_voting_mode(desc.get_voting_mode()); daal_parameter.voteWeights = daal_voting_mode; - std::cout << "step 4 infer" << std::endl; if (desc.get_result_options().test(result_options::responses)) { arr_responses.reset(1 * row_count); daal_responses = interop::convert_to_daal_homogen_table(arr_responses, row_count, 1); @@ -80,7 +78,6 @@ static infer_result call_daal_kernel(const context_cpu& ctx, else { daal_parameter.resultsToEvaluate = daal_classifier::none; } - std::cout << "step 5 infer" << std::endl; if (desc.get_result_options().test(result_options::indices)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeIndicesOfNeighbors; @@ -88,7 +85,6 @@ static infer_result call_daal_kernel(const context_cpu& ctx, daal_indices = interop::convert_to_daal_homogen_table(arr_indices, row_count, neighbor_count); } - std::cout << "step 6 infer" << std::endl; if (desc.get_result_options().test(result_options::distances)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeDistances; @@ -96,11 +92,11 @@ static infer_result call_daal_kernel(const context_cpu& ctx, daal_distance = interop::convert_to_daal_homogen_table(arr_distances, row_count, neighbor_count); } - std::cout << "step 7 infer" << std::endl; + const auto daal_data = interop::convert_to_daal_table(data); - std::cout << "step 8 infer" << std::endl; + const auto model_ptr = dynamic_cast_to_knn_model>(m); - std::cout << "step 9 infer" << std::endl; + interop::status_to_exception(interop::call_daal_kernel( ctx, daal_data.get(), diff --git a/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp index 72d61cff84f..622740b1a01 100644 --- a/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp +++ b/cpp/oneapi/dal/algo/knn/backend/cpu/train_kernel_kd_tree.cpp @@ -16,7 +16,7 @@ #include #include -#include + #include "oneapi/dal/algo/knn/backend/model_conversion.hpp" #include "oneapi/dal/algo/knn/backend/cpu/train_kernel.hpp" #include "oneapi/dal/algo/knn/backend/model_impl.hpp" @@ -50,7 +50,7 @@ static train_result call_daal_kernel(const context_cpu& ctx, throw unimplemented( dal::detail::error_messages::knn_regression_task_is_not_implemented_for_cpu()); } - std::cout << "step 1" << std::endl; + using model_t = model; using daal_model_interop_t = model_interop; const std::int64_t column_count = data.get_column_count(); @@ -63,30 +63,30 @@ static train_result call_daal_kernel(const context_cpu& ctx, dal::detail::integral_cast(desc.get_neighbor_count()), dal::detail::integral_cast(dummy_seed), data_use_in_model); - std::cout << "step 2" << std::endl; + Status status; const auto model_ptr = daal_knn::Model::create(column_count, &status); interop::status_to_exception(status); - std::cout << "step 3" << std::endl; + auto knn_model = static_cast(model_ptr.get()); // Data or responses should not be copied, copy will be happened when // the tables are passed to old ifaces const bool copy_data_responses = data_use_in_model == daal_knn::doNotUse; knn_model->impl()->setData(daal_data, copy_data_responses); - std::cout << "step 4" << std::endl; + auto daal_responses = daal::data_management::NumericTablePtr(); if (desc.get_result_options().test(result_options::responses)) { daal_responses = interop::convert_to_daal_table(responses); knn_model->impl()->setLabels(daal_responses, copy_data_responses); } - std::cout << "step 5" << std::endl; + interop::status_to_exception(interop::call_daal_kernel( ctx, knn_model->impl()->getData().get(), knn_model->impl()->getLabels().get(), knn_model, *daal_parameter.engine.get())); - std::cout << "step 6" << std::endl; + const auto model_impl = std::make_shared>(new daal_model_interop_t(model_ptr)); return train_result().set_model(dal::detail::make_private(model_impl)); diff --git a/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp b/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp index 2d57fa432a6..f594a7a5bab 100644 --- a/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp +++ b/examples/oneapi/cpp/source/knn/knn_cls_kd_tree_dense_batch.cpp @@ -36,15 +36,14 @@ int main(int argc, char const *argv[]) { const auto knn_desc = dal::knn::descriptor(5, 1); - std::cout << "before train" << std::endl; + const auto train_result = dal::train(knn_desc, x_train, y_train); - std::cout << "after train" << std::endl; + const auto x_test = dal::read(dal::csv::data_source{ test_data_file_name }); const auto y_true = dal::read(dal::csv::data_source{ test_response_file_name }); - std::cout << "before infer" << std::endl; const auto test_result = dal::infer(knn_desc, x_test, train_result.get_model()); - std::cout << "after infer" << std::endl; + std::cout << "Test results:\n" << test_result.get_responses() << std::endl; std::cout << "True responses:\n" << y_true << std::endl; From 8acbfacae775872ad2d156056cbfde0a4a76de2a Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 09:10:06 -0700 Subject: [PATCH 06/35] fixes for knn --- ...ication_predict_dense_default_batch_impl.i | 153 +++++++----------- ..._classification_train_dense_default_impl.i | 50 +++--- cpp/daal/src/externals/service_math_mkl.h | 40 ++--- .../knn/backend/cpu/infer_kernel_kd_tree.cpp | 5 +- 4 files changed, 102 insertions(+), 146 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 646d028e3a7..297f6a0179e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" -#include + namespace daal { namespace algorithms @@ -123,19 +123,16 @@ Status KNNClassificationPredictKernel::compu { Status status; SafeStatus safeStat; - std::cout << "here debug 1" << std::endl; typedef GlobalNeighbors Neighbors; typedef Heap MaxHeap; typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; typedef daal::services::internal::MaxVal MaxVal; typedef daal::internal::MathInst Math; - std::cout << "here debug 2" << std::endl; size_t k; size_t nClasses; VoteWeights voteWeights = voteUniform; DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; - std::cout << "here debug 3" << std::endl; - const auto par3 = dynamic_cast(par); + const auto par3 = dynamic_cast(par); if (par3) { k = par3->k; @@ -143,7 +140,6 @@ Status KNNClassificationPredictKernel::compu resultsToEvaluate = par3->resultsToEvaluate; nClasses = par3->nClasses; } - std::cout << "here debug 4" << std::endl; if (par3 == NULL) return Status(ErrorNullParameterNotSupported); const Model * const model = static_cast(m); @@ -155,7 +151,6 @@ Status KNNClassificationPredictKernel::compu { labels = model->impl()->getLabels().get(); } - std::cout << "here debug 5" << std::endl; const NumericTable * const modelIndices = model->impl()->getIndices().get(); size_t iSize = 1; @@ -163,36 +158,28 @@ Status KNNClassificationPredictKernel::compu { iSize *= 2; } - const size_t heapSize = (iSize / 16 + 1) * 16; - std::cout << "here debug 6" << std::endl; - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; - std::cout << "here debug math 1" << std::endl; + const size_t heapSize = (iSize / 16 + 1) * 16; + const size_t xRowCount = x->getNumberOfRows(); + const algorithmFpType base = 2.0; const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; - std::cout << "here debug math 2" << std::endl; - const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); - std::cout << "here debug math 3" << std::endl; - std::cout << "here debug 7" << std::endl; + const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); struct Local { MaxHeap heap; SearchStack stack; }; - std::cout << "here debug 8" << std::endl; daal::tls localTLS([&]() -> Local * { Local * const ptr = service_scalable_calloc(1); if (ptr) { if (!ptr->heap.init(heapSize)) { - std::cout << "error 1" << std::endl; safeStat.add(services::ErrorMemoryAllocationFailed); service_scalable_free(ptr); return nullptr; } if (!ptr->stack.init(stackSize)) { - std::cout << "error 2" << std::endl; safeStat.add(services::ErrorMemoryAllocationFailed); ptr->heap.clear(); service_scalable_free(ptr); @@ -201,108 +188,81 @@ Status KNNClassificationPredictKernel::compu } else { - std::cout << "error 3" << std::endl; safeStat.add(services::ErrorMemoryAllocationFailed); } return ptr; }); DAAL_CHECK_STATUS_OK((status.ok()), status); - std::cout << "here debug 9" << std::endl; - const auto maxThreads = threader_get_threads_number(); - auto nThreads = (maxThreads < 1) ? 1 : maxThreads; - std::cout << "maxthreads =" << maxThreads << std::endl; - std::cout << "nthreads =" << nThreads << std::endl; + const auto maxThreads = threader_get_threads_number(); + auto nThreads = (maxThreads < 1) ? 1 : maxThreads; const size_t xColumnCount = x->getNumberOfColumns(); const size_t rowsPerBlock = 128; const size_t blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; - std::cout << "here debug 11" << std::endl; services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); - std::cout << "here debug 12" << std::endl; daal::threader_for(blockCount, blockCount, [&](int iBlock) { - if (!safeStat.ok()) return; Local * const local = localTLS.local(); DAAL_CHECK_MALLOC_THR(local); - if (local) - { - const size_t first = iBlock * rowsPerBlock; - const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - const algorithmFpType radius = MaxVal::get(); - data_management::BlockDescriptor xBD; - const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); - const algorithmFpType * const dx = xBD.getBlockPtr(); + const size_t first = iBlock * rowsPerBlock; + const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - data_management::BlockDescriptor indicesBD; - data_management::BlockDescriptor distancesBD; - if (indices) - { - std::cout << "in parallel for 1" << std::endl; - DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); - } - if (distances) - { - std::cout << "in parallel for 2" << std::endl; - DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); - } - if (labels) - { - std::cout << "in parallel for 3" << std::endl; - std::cout << "in parallel for lables 1" << std::endl; - const size_t yColumnCount = y->getNumberOfColumns(); - std::cout << "in parallel for lables 2" << std::endl; - data_management::BlockDescriptor yBD; - std::cout << "in parallel for lables 3" << std::endl; - y->getBlockOfRows(first, last - first, writeOnly, yBD); - std::cout << "in parallel for lables 4" << std::endl; - auto * const dy = yBD.getBlockPtr(); - std::cout << "in parallel for lables 5" << std::endl; - for (size_t i = 0; i < last - first; ++i) - { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR( - predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - } - std::cout << "in parallel for lables 6" << std::endl; - DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); - std::cout << "in parallel for lables 7" << std::endl; - } - else - { - std::cout << "in parallel for 4" << std::endl; - for (size_t i = 0; i < last - first; ++i) - { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - } - std::cout << "in parallel for 5" << std::endl; - } + const algorithmFpType radius = MaxVal::get(); + data_management::BlockDescriptor xBD; + const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); + const algorithmFpType * const dx = xBD.getBlockPtr(); - if (indices) + data_management::BlockDescriptor indicesBD; + data_management::BlockDescriptor distancesBD; + if (indices) + { + DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); + } + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); + } + if (labels) + { + const size_t yColumnCount = y->getNumberOfColumns(); + data_management::BlockDescriptor yBD; + y->getBlockOfRows(first, last - first, writeOnly, yBD); + auto * const dy = yBD.getBlockPtr(); + for (size_t i = 0; i < last - first; ++i) { - std::cout << "in parallel for 6" << std::endl; - DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); - std::cout << "in parallel for 6.1" << std::endl; + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, + soa_arrays); + DAAL_CHECK_STATUS_THR( + predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - - if (distances) + DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); + } + else + { + for (size_t i = 0; i < last - first; ++i) { - std::cout << "in parallel for 7" << std::endl; - DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); - std::cout << "in parallel for 7.1" << std::endl; + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, + soa_arrays); + DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } + } - const_cast(*x).releaseBlockOfRows(xBD); + if (indices) + { + DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); } + + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + } + + const_cast(*x).releaseBlockOfRows(xBD); }); status = safeStat.detach(); - std::cout << "here debug 15" << std::endl; DAAL_CHECK_SAFE_STATUS() - std::cout << "here debug 16" << std::endl; localTLS.reduce([=](Local * ptr) -> void { if (ptr) { @@ -311,7 +271,6 @@ Status KNNClassificationPredictKernel::compu service_scalable_free(ptr); } }); - std::cout << "here debug 17" << std::endl; return status; } @@ -501,9 +460,7 @@ services::Status KNNClassificationPredictKernel(static_cast(first + rowsPerBlock), xRowCount); - if (first < last) - { - BBox b; - size_t i = first; - b.upper = dx[indexes[i]]; - b.lower = dx[indexes[i]]; - PRAGMA_IVDEP - for (++i; i < last; ++i) - { - if (b.lower > dx[indexes[i]]) - { - b.lower = dx[indexes[i]]; - } - if (b.upper < dx[indexes[i]]) - { - b.upper = dx[indexes[i]]; - } - } + const size_t first = iBlock * rowsPerBlock; + const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - if (bboxLocal->upper < b.upper) + if (first < last) + { + BBox b; + size_t i = first; + b.upper = dx[indexes[i]]; + b.lower = dx[indexes[i]]; + PRAGMA_IVDEP + for (++i; i < last; ++i) + { + if (b.lower > dx[indexes[i]]) { - bboxLocal->upper = b.upper; + b.lower = dx[indexes[i]]; } - if (bboxLocal->lower > b.lower) + if (b.upper < dx[indexes[i]]) { - bboxLocal->lower = b.lower; + b.upper = dx[indexes[i]]; } } + + if (bboxLocal->upper < b.upper) + { + bboxLocal->upper = b.upper; + } + if (bboxLocal->lower > b.lower) + { + bboxLocal->lower = b.lower; + } } }); diff --git a/cpp/daal/src/externals/service_math_mkl.h b/cpp/daal/src/externals/service_math_mkl.h index fa5ce46a5ea..7a65b72e2cc 100644 --- a/cpp/daal/src/externals/service_math_mkl.h +++ b/cpp/daal/src/externals/service_math_mkl.h @@ -103,54 +103,54 @@ struct MklMath static void vPowx(SizeType n, const double * in, double in1, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCeil(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErfInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErf(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vExp(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static double vExpThreshold() { return -650.0; } static void vTanh(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vSqrt(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog1p(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCdfNormInv(SizeType n, const double * in, double * out) { - __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } }; @@ -215,54 +215,54 @@ struct MklMath static void vPowx(SizeType n, const float * in, float in1, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsPowx, ((int)n, in, in1, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCeil(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCeil, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErfInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vErf(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsErf, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vExp(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsExp, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static float vExpThreshold() { return -75.0f; } static void vTanh(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsTanh, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vSqrt(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLn, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vLog1p(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } static void vCdfNormInv(SizeType n, const float * in, float * out) { - __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((int)n, in, out, (VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } }; diff --git a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp index c8f9c5b321f..1c2b413ef52 100644 --- a/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp +++ b/cpp/oneapi/dal/algo/knn/backend/cpu/infer_kernel_kd_tree.cpp @@ -63,14 +63,15 @@ static infer_result call_daal_kernel(const context_cpu& ctx, const std::int64_t dummy_seed = 777; const auto data_use_in_model = daal_knn::doNotUse; - daal_knn::Parameter daal_parameter( dal::detail::integral_cast(desc.get_class_count()), dal::detail::integral_cast(desc.get_neighbor_count()), dal::detail::integral_cast(dummy_seed), data_use_in_model); + const auto daal_voting_mode = convert_to_daal_kdtree_voting_mode(desc.get_voting_mode()); daal_parameter.voteWeights = daal_voting_mode; + if (desc.get_result_options().test(result_options::responses)) { arr_responses.reset(1 * row_count); daal_responses = interop::convert_to_daal_homogen_table(arr_responses, row_count, 1); @@ -78,6 +79,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, else { daal_parameter.resultsToEvaluate = daal_classifier::none; } + if (desc.get_result_options().test(result_options::indices)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeIndicesOfNeighbors; @@ -85,6 +87,7 @@ static infer_result call_daal_kernel(const context_cpu& ctx, daal_indices = interop::convert_to_daal_homogen_table(arr_indices, row_count, neighbor_count); } + if (desc.get_result_options().test(result_options::distances)) { dal::detail::check_mul_overflow(neighbor_count, row_count); daal_parameter.resultsToCompute |= daal_knn::computeDistances; From 9ec31b81f7469dc1a99597f3a793caec29d63ee4 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 30 Sep 2024 09:34:00 -0700 Subject: [PATCH 07/35] fixes for knn --- ..._classification_predict_dense_default_batch_impl.i | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 297f6a0179e..891d52e6b17 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -128,11 +128,13 @@ Status KNNClassificationPredictKernel::compu typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; typedef daal::services::internal::MaxVal MaxVal; typedef daal::internal::MathInst Math; + size_t k; size_t nClasses; VoteWeights voteWeights = voteUniform; DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; - const auto par3 = dynamic_cast(par); + + const auto par3 = dynamic_cast(par); if (par3) { k = par3->k; @@ -140,6 +142,7 @@ Status KNNClassificationPredictKernel::compu resultsToEvaluate = par3->resultsToEvaluate; nClasses = par3->nClasses; } + if (par3 == NULL) return Status(ErrorNullParameterNotSupported); const Model * const model = static_cast(m); @@ -151,6 +154,7 @@ Status KNNClassificationPredictKernel::compu { labels = model->impl()->getLabels().get(); } + const NumericTable * const modelIndices = model->impl()->getIndices().get(); size_t iSize = 1; @@ -263,7 +267,7 @@ Status KNNClassificationPredictKernel::compu }); status = safeStat.detach(); DAAL_CHECK_SAFE_STATUS() - localTLS.reduce([=](Local * ptr) -> void { + localTLS.reduce([&](Local * ptr) -> void { if (ptr) { ptr->stack.clear(); @@ -271,7 +275,6 @@ Status KNNClassificationPredictKernel::compu service_scalable_free(ptr); } }); - return status; } @@ -460,7 +463,9 @@ services::Status KNNClassificationPredictKernel Date: Mon, 30 Sep 2024 22:46:26 -0700 Subject: [PATCH 08/35] minor fixes --- ...knn_classification_predict_dense_default_batch_impl.i | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 891d52e6b17..1b7c318159e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -122,7 +122,7 @@ Status KNNClassificationPredictKernel::compu const daal::algorithms::Parameter * par) { Status status; - SafeStatus safeStat; + typedef GlobalNeighbors Neighbors; typedef Heap MaxHeap; typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; @@ -172,6 +172,8 @@ Status KNNClassificationPredictKernel::compu MaxHeap heap; SearchStack stack; }; + + SafeStatus safeStat; daal::tls localTLS([&]() -> Local * { Local * const ptr = service_scalable_calloc(1); if (ptr) @@ -201,7 +203,7 @@ Status KNNClassificationPredictKernel::compu const auto maxThreads = threader_get_threads_number(); auto nThreads = (maxThreads < 1) ? 1 : maxThreads; const size_t xColumnCount = x->getNumberOfColumns(); - const size_t rowsPerBlock = 128; + const size_t rowsPerBlock = (xRowCount + maxThreads - 1) / maxThreads; const size_t blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; services::internal::TArrayScalable soa_arrays; @@ -265,8 +267,9 @@ Status KNNClassificationPredictKernel::compu const_cast(*x).releaseBlockOfRows(xBD); }); + status = safeStat.detach(); - DAAL_CHECK_SAFE_STATUS() + localTLS.reduce([&](Local * ptr) -> void { if (ptr) { From 4308bc2df110fd06ccadb507e04bff7c3a0b7f4e Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 1 Oct 2024 02:15:16 -0700 Subject: [PATCH 09/35] fixes for knn --- ...lassification_predict_dense_default_batch_impl.i | 1 + ...ee_knn_classification_train_dense_default_impl.i | 13 ++++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 1b7c318159e..fa4b6a93581 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -269,6 +269,7 @@ Status KNNClassificationPredictKernel::compu }); status = safeStat.detach(); + if (!status) return status; localTLS.reduce([&](Local * ptr) -> void { if (ptr) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index ce7637889cb..fe74c6880a7 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -315,8 +315,8 @@ Status KNNClassificationTrainBatchKernel(x).getBlockOfColumnValues(j, 0, xRowCount, readOnly, columnBD); const algorithmFpType * const dx = columnBD.getBlockPtr(); - - daal::tls bboxTLS([=, &status]() -> BBox * { + SafeStatus safeStat; + daal::tls bboxTLS([&]() -> BBox * { BBox * const ptr = service_scalable_calloc(1); if (ptr) { @@ -325,16 +325,16 @@ Status KNNClassificationTrainBatchKernel(static_cast(first + rowsPerBlock), xRowCount); @@ -368,6 +368,9 @@ Status KNNClassificationTrainBatchKernel void { if (v) { From 21a35b5555200e1855782aa94916164f0cf1022b Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 1 Oct 2024 04:41:06 -0700 Subject: [PATCH 10/35] fixes for rowsperblock --- ...tree_knn_classification_predict_dense_default_batch_impl.i | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index fa4b6a93581..d91494f9ec9 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -203,8 +203,8 @@ Status KNNClassificationPredictKernel::compu const auto maxThreads = threader_get_threads_number(); auto nThreads = (maxThreads < 1) ? 1 : maxThreads; const size_t xColumnCount = x->getNumberOfColumns(); - const size_t rowsPerBlock = (xRowCount + maxThreads - 1) / maxThreads; - const size_t blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; + const auto rowsPerBlock = 128; + const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); From 8a01218ccb17f59ff05f2baac1ef83609a13966c Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 1 Oct 2024 07:52:09 -0700 Subject: [PATCH 11/35] fixes --- ...ee_knn_classification_predict_dense_default_batch_impl.i | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index d91494f9ec9..943ebba0fcc 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -24,8 +24,8 @@ #ifndef __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__ #define __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__ -#include "src/threading/threading.h" #include "services/daal_defines.h" +#include "src/threading/threading.h" #include "src/services/service_utils.h" #include "algorithms/algorithm.h" #include "services/daal_atomic_int.h" @@ -208,6 +208,8 @@ Status KNNClassificationPredictKernel::compu services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); + + services::Environment::getInstance()->setNumberOfThreads(1); daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); DAAL_CHECK_MALLOC_THR(local); @@ -270,7 +272,7 @@ Status KNNClassificationPredictKernel::compu status = safeStat.detach(); if (!status) return status; - + services::Environment::getInstance()->setNumberOfThreads(nThreads); localTLS.reduce([&](Local * ptr) -> void { if (ptr) { From e038049af1779ce24558a0d04626ef30a30de048 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 2 Oct 2024 02:39:16 -0700 Subject: [PATCH 12/35] add single thread service funcs --- ...ication_predict_dense_default_batch_impl.i | 10 +- cpp/daal/src/externals/service_math.h | 42 ++++ cpp/daal/src/externals/service_math_mkl.h | 224 ++++++++++++++++++ 3 files changed, 270 insertions(+), 6 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 943ebba0fcc..fb7aa93d321 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -165,8 +165,8 @@ Status KNNClassificationPredictKernel::compu const size_t heapSize = (iSize / 16 + 1) * 16; const size_t xRowCount = x->getNumberOfRows(); const algorithmFpType base = 2.0; - const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; - const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); + const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; + const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); struct Local { MaxHeap heap; @@ -203,13 +203,12 @@ Status KNNClassificationPredictKernel::compu const auto maxThreads = threader_get_threads_number(); auto nThreads = (maxThreads < 1) ? 1 : maxThreads; const size_t xColumnCount = x->getNumberOfColumns(); - const auto rowsPerBlock = 128; + const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); - services::Environment::getInstance()->setNumberOfThreads(1); daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); DAAL_CHECK_MALLOC_THR(local); @@ -272,7 +271,6 @@ Status KNNClassificationPredictKernel::compu status = safeStat.detach(); if (!status) return status; - services::Environment::getInstance()->setNumberOfThreads(nThreads); localTLS.reduce([&](Local * ptr) -> void { if (ptr) { @@ -470,7 +468,7 @@ services::Status KNNClassificationPredictKernel::sPowx(in, in1); } + static fpType xsPowx(fpType in, fpType in1) { return _impl::xsPowx(in, in1); } + static fpType sCeil(fpType in) { return _impl::sCeil(in); } + static fpType xsCeil(fpType in) { return _impl::xsCeil(in); } + static fpType sErfInv(fpType in) { return _impl::sErfInv(in); } + static fpType xsErfInv(fpType in) { return _impl::xsErfInv(in); } + static fpType sErf(fpType in) { return _impl::sErf(in); } + static fpType xsErf(fpType in) { return _impl::xsErf(in); } + static fpType sLog(fpType in) { return _impl::sLog(in); } + static fpType xsLog(fpType in) { return _impl::xsLog(in); } + static fpType sCdfNormInv(fpType in) { return _impl::sCdfNormInv(in); } + static fpType xsCdfNormInv(fpType in) { return _impl::xsCdfNormInv(in); } + static void vPowx(SizeType n, const fpType * in, fpType in1, fpType * out) { _impl::vPowx(n, in, in1, out); } + static void xvPowx(SizeType n, const fpType * in, fpType in1, fpType * out) { _impl::xvPowx(n, in, in1, out); } + static void vPowxAsLnExp(SizeType n, const fpType * in, fpType in1, fpType * out) { _impl::vLog(n, in, out); @@ -72,25 +86,53 @@ struct Math _impl::vExp(n, out, out); } + static void xvPowxAsLnExp(SizeType n, const fpType * in, fpType in1, fpType * out) + { + _impl::xvLog(n, in, out); + for (size_t i = 0; i < n; i++) + { + out[i] *= in1; + } + _impl::xvExp(n, out, out); + } + static void vCeil(SizeType n, const fpType * in, fpType * out) { _impl::vCeil(n, in, out); } + static void xvCeil(SizeType n, const fpType * in, fpType * out) { _impl::xvCeil(n, in, out); } + static void vErfInv(SizeType n, const fpType * in, fpType * out) { _impl::vErfInv(n, in, out); } + static void xvErfInv(SizeType n, const fpType * in, fpType * out) { _impl::xvErfInv(n, in, out); } + static void vErf(SizeType n, const fpType * in, fpType * out) { _impl::vErf(n, in, out); } + static void xvErf(SizeType n, const fpType * in, fpType * out) { _impl::xvErf(n, in, out); } + static void vExp(SizeType n, const fpType * in, fpType * out) { _impl::vExp(n, in, out); } + static void xvExp(SizeType n, const fpType * in, fpType * out) { _impl::xvExp(n, in, out); } + static fpType vExpThreshold() { return _impl::vExpThreshold(); } static void vTanh(SizeType n, const fpType * in, fpType * out) { _impl::vTanh(n, in, out); } + static void xvTanh(SizeType n, const fpType * in, fpType * out) { _impl::xvTanh(n, in, out); } + static void vSqrt(SizeType n, const fpType * in, fpType * out) { _impl::vSqrt(n, in, out); } + static void xvSqrt(SizeType n, const fpType * in, fpType * out) { _impl::xvSqrt(n, in, out); } + static void vLog(SizeType n, const fpType * in, fpType * out) { _impl::vLog(n, in, out); } + static void xvLog(SizeType n, const fpType * in, fpType * out) { _impl::xvLog(n, in, out); } + static void vLog1p(SizeType n, const fpType * in, fpType * out) { _impl::vLog1p(n, in, out); } + static void xvLog1p(SizeType n, const fpType * in, fpType * out) { _impl::xvLog1p(n, in, out); } + static void vCdfNormInv(SizeType n, const fpType * in, fpType * out) { _impl::vCdfNormInv(n, in, out); } + + static void xvCdfNormInv(SizeType n, const fpType * in, fpType * out) { _impl::xvCdfNormInv(n, in, out); } }; } // namespace internal diff --git a/cpp/daal/src/externals/service_math_mkl.h b/cpp/daal/src/externals/service_math_mkl.h index 7a65b72e2cc..08df5d7b11d 100644 --- a/cpp/daal/src/externals/service_math_mkl.h +++ b/cpp/daal/src/externals/service_math_mkl.h @@ -66,6 +66,13 @@ struct MklMath return r; } + static double xsPowx(double in, double in1) + { + double r; + xvPowx(1, &in, in1, &r); + return r; + } + static double sCeil(double in) { double r; @@ -73,6 +80,13 @@ struct MklMath return r; } + static double xsCeil(double in) + { + double r; + xvCeil(1, &in, &r); + return r; + } + static double sErfInv(double in) { double r; @@ -80,6 +94,13 @@ struct MklMath return r; } + static double xsErfInv(double in) + { + double r; + xvErfInv(1, &in, &r); + return r; + } + static double sErf(double in) { double r; @@ -87,6 +108,13 @@ struct MklMath return r; } + static double xsErf(double in) + { + double r; + xvErf(1, &in, &r); + return r; + } + static double sLog(double in) { double r; @@ -94,6 +122,13 @@ struct MklMath return r; } + static double xsLog(double in) + { + double r; + xvLog(1, &in, &r); + return r; + } + static double sCdfNormInv(double in) { double r; @@ -101,31 +136,73 @@ struct MklMath return r; } + static double xsCdfNormInv(double in) + { + double r; + xvCdfNormInv(1, &in, &r); + return r; + } + static void vPowx(SizeType n, const double * in, double in1, double * out) { __DAAL_MKLFN_CALL_MATH(vmdPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvPowx(SizeType n, const double * in, double in1, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vCeil(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvCeil(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vErfInv(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvErfInv(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vErf(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvErf(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vExp(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvExp(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static double vExpThreshold() { return -650.0; } static void vTanh(SizeType n, const double * in, double * out) @@ -133,25 +210,60 @@ struct MklMath __DAAL_MKLFN_CALL_MATH(vmdTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvTanh(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vSqrt(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvSqrt(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vLog(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvLog(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vLog1p(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvLog1p(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vCdfNormInv(SizeType n, const double * in, double * out) { __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + + static void xvCdfNormInv(SizeType n, const double * in, double * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmdCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } }; /* @@ -178,6 +290,13 @@ struct MklMath return r; } + static float xsPowx(float in, float in1) + { + float r; + xvPowx(1, &in, in1, &r); + return r; + } + static float sCeil(float in) { float r; @@ -185,6 +304,13 @@ struct MklMath return r; } + static float xsCeil(float in) + { + float r; + xvCeil(1, &in, &r); + return r; + } + static float sErfInv(float in) { float r; @@ -192,6 +318,13 @@ struct MklMath return r; } + static float xsErfInv(float in) + { + float r; + xvErfInv(1, &in, &r); + return r; + } + static float sErf(float in) { float r; @@ -199,6 +332,13 @@ struct MklMath return r; } + static float xsErf(float in) + { + float r; + xvErf(1, &in, &r); + return r; + } + static float sLog(float in) { float r; @@ -206,6 +346,13 @@ struct MklMath return r; } + static float xsLog(float in) + { + float r; + xvLog(1, &in, &r); + return r; + } + static float sCdfNormInv(float in) { float r; @@ -213,31 +360,73 @@ struct MklMath return r; } + static float xsCdfNormInv(float in) + { + float r; + xvCdfNormInv(1, &in, &r); + return r; + } + static void vPowx(SizeType n, const float * in, float in1, float * out) { __DAAL_MKLFN_CALL_MATH(vmsPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvPowx(SizeType n, const float * in, float in1, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsPowx, ((MKL_INT)n, in, in1, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vCeil(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvCeil(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsCeil, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vErfInv(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvErfInv(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsErfInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vErf(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvErf(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsErf, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vExp(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvExp(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsExp, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static float vExpThreshold() { return -75.0f; } static void vTanh(SizeType n, const float * in, float * out) @@ -245,25 +434,60 @@ struct MklMath __DAAL_MKLFN_CALL_MATH(vmsTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvTanh(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsTanh, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vSqrt(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvSqrt(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsSqrt, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vLog(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvLog(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsLn, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vLog1p(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + static void xvLog1p(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsLog1p, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } + static void vCdfNormInv(SizeType n, const float * in, float * out) { __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); } + + static void xvCdfNormInv(SizeType n, const float * in, float * out) + { + int old_nthr = mkl_set_num_threads_local(1); + __DAAL_MKLFN_CALL_MATH(vmsCdfNormInv, ((MKL_INT)n, in, out, (MKL_INT)(VML_HA | VML_FTZDAZ_ON | VML_ERRMODE_IGNORE))); + mkl_set_num_threads_local(old_nthr); + } }; } // namespace mkl From 314c7a4c140c50b94151061e0948095bfe0f2917 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 2 Oct 2024 05:55:16 -0700 Subject: [PATCH 13/35] fixes for knn --- ...ication_predict_dense_default_batch_impl.i | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index fb7aa93d321..6785750424e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" - +#include namespace daal { namespace algorithms @@ -234,17 +234,22 @@ Status KNNClassificationPredictKernel::compu if (labels) { const size_t yColumnCount = y->getNumberOfColumns(); + std::cout << "here labels -1" << std::endl; data_management::BlockDescriptor yBD; + std::cout << "here labels 0" << std::endl; y->getBlockOfRows(first, last - first, writeOnly, yBD); auto * const dy = yBD.getBlockPtr(); + std::cout << "here labels 1" << std::endl; for (size_t i = 0; i < last - first; ++i) { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays); DAAL_CHECK_STATUS_THR( - predict(&(dy[i * yColumnCount]), local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); + std::cout << "here labels 2" << std::endl; + y->releaseBlockOfRows(yBD); + std::cout << "here labels 3" << std::endl; } else { @@ -265,18 +270,22 @@ Status KNNClassificationPredictKernel::compu { DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); } - const_cast(*x).releaseBlockOfRows(xBD); }); - + std::cout << "here final 1" << std::endl; status = safeStat.detach(); + std::cout << "here final 2" << std::endl; if (!status) return status; + std::cout << "here final 3" << std::endl; localTLS.reduce([&](Local * ptr) -> void { if (ptr) { ptr->stack.clear(); + std::cout << "here final 4" << std::endl; ptr->heap.clear(); + std::cout << "here final 5" << std::endl; service_scalable_free(ptr); + std::cout << "here final 6" << std::endl; } }); return status; @@ -427,22 +436,23 @@ services::Status KNNClassificationPredictKernel & distances, size_t index, const size_t nClasses) { typedef daal::internal::MathInst Math; - + std::cout << "here debug1" << std::endl; const size_t heapSize = heap.size(); if (heapSize < 1) return services::Status(); - + std::cout << "here debug2" << std::endl; if (indices.getNumberOfRows() != 0) { + std::cout << "here debug3" << std::endl; DAAL_ASSERT(modelIndices); services::Status s; data_management::BlockDescriptor modelIndicesBD; - + std::cout << "here debug4" << std::endl; const auto nIndices = indices.getNumberOfColumns(); DAAL_ASSERT(heapSize <= nIndices); int * const indicesPtr = indices.getBlockPtr() + index * nIndices; - + std::cout << "here debug5" << std::endl; for (size_t i = 0; i < heapSize; ++i) { s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); @@ -453,51 +463,53 @@ services::Status KNNClassificationPredictKernel(modelIndices)->releaseBlockOfRows(modelIndicesBD); DAAL_ASSERT(s.ok()); } + std::cout << "here debug6" << std::endl; } if (distances.getNumberOfRows() != 0) { services::Status s; - + std::cout << "here debug7" << std::endl; const auto nDistances = distances.getNumberOfColumns(); DAAL_ASSERT(heapSize <= nDistances); - + std::cout << "here debug8" << std::endl; algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; for (size_t i = 0; i < heapSize; ++i) { distancesPtr[i] = heap[i].distance; } - + std::cout << "here debug9" << std::endl; Math::xvSqrt(heapSize, distancesPtr, distancesPtr); - + std::cout << "here debug10" << std::endl; for (size_t i = heapSize; i < nDistances; ++i) { distancesPtr[i] = -1; } + std::cout << "here debug11" << std::endl; } if (labels) { DAAL_ASSERT(predictedClass); - + std::cout << "here debug12" << std::endl; data_management::BlockDescriptor labelBD; algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); DAAL_CHECK_MALLOC(classWeights); DAAL_CHECK_MALLOC(classes); - + std::cout << "here debug13" << std::endl; for (size_t i = 0; i < nClasses; ++i) { classWeights[i] = 0; } - + std::cout << "here debug14" << std::endl; for (size_t i = 0; i < heapSize; ++i) { const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); classes[i] = *(labelBD.getBlockPtr()); const_cast(labels)->releaseBlockOfColumnValues(labelBD); } - + std::cout << "here debug15" << std::endl; if (voteWeights == voteUniform) { for (size_t i = 0; i < heapSize; ++i) @@ -508,11 +520,11 @@ services::Status KNNClassificationPredictKernel::get(); bool isContainZero = false; - + std::cout << "here debug18" << std::endl; for (size_t i = 0; i < heapSize; ++i) { if (heap[i].distance <= epsilon) @@ -521,7 +533,7 @@ services::Status KNNClassificationPredictKernel(classes); + std::cout << "here debug24" << std::endl; service_free(classWeights); + std::cout << "here debug25" << std::endl; classes = nullptr; } From 9ab5888a66112356b05e045b5b92bdaa48a62476 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Fri, 4 Oct 2024 07:30:06 -0700 Subject: [PATCH 14/35] minor fixes --- ...ication_predict_dense_default_batch_impl.i | 44 +----- cpp/daal/src/externals/service_math_ref.h | 130 ++++++++++++++++++ 2 files changed, 136 insertions(+), 38 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 6785750424e..7b7b12d1a30 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -146,7 +146,7 @@ Status KNNClassificationPredictKernel::compu if (par3 == NULL) return Status(ErrorNullParameterNotSupported); const Model * const model = static_cast(m); - const auto & kdTreeTable = *(model->impl()->getKDTreeTable()); + const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); const NumericTable * labels = nullptr; @@ -234,12 +234,9 @@ Status KNNClassificationPredictKernel::compu if (labels) { const size_t yColumnCount = y->getNumberOfColumns(); - std::cout << "here labels -1" << std::endl; data_management::BlockDescriptor yBD; - std::cout << "here labels 0" << std::endl; y->getBlockOfRows(first, last - first, writeOnly, yBD); auto * const dy = yBD.getBlockPtr(); - std::cout << "here labels 1" << std::endl; for (size_t i = 0; i < last - first; ++i) { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, @@ -247,9 +244,7 @@ Status KNNClassificationPredictKernel::compu DAAL_CHECK_STATUS_THR( predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - std::cout << "here labels 2" << std::endl; y->releaseBlockOfRows(yBD); - std::cout << "here labels 3" << std::endl; } else { @@ -272,20 +267,14 @@ Status KNNClassificationPredictKernel::compu } const_cast(*x).releaseBlockOfRows(xBD); }); - std::cout << "here final 1" << std::endl; status = safeStat.detach(); - std::cout << "here final 2" << std::endl; if (!status) return status; - std::cout << "here final 3" << std::endl; localTLS.reduce([&](Local * ptr) -> void { if (ptr) { ptr->stack.clear(); - std::cout << "here final 4" << std::endl; ptr->heap.clear(); - std::cout << "here final 5" << std::endl; service_scalable_free(ptr); - std::cout << "here final 6" << std::endl; } }); return status; @@ -349,6 +338,8 @@ void KNNClassificationPredictKernel::findNea GlobalNeighbors curNeighbor; size_t i; SearchNode cur, toPush; + const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); + const KDTreeNode * node; cur.nodeIndex = rootTreeNodeIndex; cur.minDistance = 0; @@ -359,7 +350,7 @@ void KNNClassificationPredictKernel::findNea data_management::BlockDescriptor xBD[2]; for (;;) { - node = static_cast(kdTreeTable.getArray()) + cur.nodeIndex; + node = &nodes[cur.nodeIndex]; if (node->dimension == __KDTREE_NULLDIMENSION) { start = node->leftIndex; @@ -396,7 +387,7 @@ void KNNClassificationPredictKernel::findNea if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(static_cast(kdTreeTable.getArray()) + cur.nodeIndex); + DAAL_PREFETCH_READ_T0(&nodes[cur.nodeIndex]); } else { @@ -419,7 +410,7 @@ void KNNClassificationPredictKernel::findNea else if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(static_cast(kdTreeTable.getArray()) + cur.nodeIndex); + DAAL_PREFETCH_READ_T0(&nodes[cur.nodeIndex]); } else { @@ -436,23 +427,18 @@ services::Status KNNClassificationPredictKernel & distances, size_t index, const size_t nClasses) { typedef daal::internal::MathInst Math; - std::cout << "here debug1" << std::endl; const size_t heapSize = heap.size(); if (heapSize < 1) return services::Status(); - std::cout << "here debug2" << std::endl; if (indices.getNumberOfRows() != 0) { - std::cout << "here debug3" << std::endl; DAAL_ASSERT(modelIndices); services::Status s; data_management::BlockDescriptor modelIndicesBD; - std::cout << "here debug4" << std::endl; const auto nIndices = indices.getNumberOfColumns(); DAAL_ASSERT(heapSize <= nIndices); int * const indicesPtr = indices.getBlockPtr() + index * nIndices; - std::cout << "here debug5" << std::endl; for (size_t i = 0; i < heapSize; ++i) { s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); @@ -463,53 +449,43 @@ services::Status KNNClassificationPredictKernel(modelIndices)->releaseBlockOfRows(modelIndicesBD); DAAL_ASSERT(s.ok()); } - std::cout << "here debug6" << std::endl; } if (distances.getNumberOfRows() != 0) { services::Status s; - std::cout << "here debug7" << std::endl; const auto nDistances = distances.getNumberOfColumns(); DAAL_ASSERT(heapSize <= nDistances); - std::cout << "here debug8" << std::endl; algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; for (size_t i = 0; i < heapSize; ++i) { distancesPtr[i] = heap[i].distance; } - std::cout << "here debug9" << std::endl; Math::xvSqrt(heapSize, distancesPtr, distancesPtr); - std::cout << "here debug10" << std::endl; for (size_t i = heapSize; i < nDistances; ++i) { distancesPtr[i] = -1; } - std::cout << "here debug11" << std::endl; } if (labels) { DAAL_ASSERT(predictedClass); - std::cout << "here debug12" << std::endl; data_management::BlockDescriptor labelBD; algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); DAAL_CHECK_MALLOC(classWeights); DAAL_CHECK_MALLOC(classes); - std::cout << "here debug13" << std::endl; for (size_t i = 0; i < nClasses; ++i) { classWeights[i] = 0; } - std::cout << "here debug14" << std::endl; for (size_t i = 0; i < heapSize; ++i) { const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); classes[i] = *(labelBD.getBlockPtr()); const_cast(labels)->releaseBlockOfColumnValues(labelBD); } - std::cout << "here debug15" << std::endl; if (voteWeights == voteUniform) { for (size_t i = 0; i < heapSize; ++i) @@ -520,11 +496,9 @@ services::Status KNNClassificationPredictKernel::get(); bool isContainZero = false; - std::cout << "here debug18" << std::endl; for (size_t i = 0; i < heapSize; ++i) { if (heap[i].distance <= epsilon) @@ -533,7 +507,6 @@ services::Status KNNClassificationPredictKernel(classes); - std::cout << "here debug24" << std::endl; service_free(classWeights); - std::cout << "here debug25" << std::endl; classes = nullptr; } diff --git a/cpp/daal/src/externals/service_math_ref.h b/cpp/daal/src/externals/service_math_ref.h index 07062c1ba2f..5ac4d515dd1 100644 --- a/cpp/daal/src/externals/service_math_ref.h +++ b/cpp/daal/src/externals/service_math_ref.h @@ -58,48 +58,88 @@ struct RefMath static double sPowx(double in, double in1) { return pow(in, in1); } + static double xsPowx(double in, double in1) { return pow(in, in1); } + static double sCeil(double in) { return ceil(in); } + static double xsCeil(double in) { return ceil(in); } + // Not implemented static double sErfInv(double in) { return std::numeric_limits::quiet_NaN(); } + // Not implemented + static double xsErfInv(double in) { return std::numeric_limits::quiet_NaN(); } + static double sErf(double in) { return erf(in); } + static double xsErf(double in) { return erf(in); } + static double sLog(double in) { return log(in); } + static double xsLog(double in) { return log(in); } + // Not implemented static double sCdfNormInv(double in) { return std::numeric_limits::quiet_NaN(); } + // Not implemented + static double xsCdfNormInv(double in) { return std::numeric_limits::quiet_NaN(); } + static void vPowx(SizeType n, const double * in, double in1, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = pow(in[i], in1); } + static void xvPowx(SizeType n, const double * in, double in1, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = pow(in[i], in1); + } + static void vCeil(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = ceil(in[i]); } + static void xvCeil(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = ceil(in[i]); + } + // Not implemented static void vErfInv(SizeType n, const double * in, double * out) { for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); } + // Not implemented + static void xvErfInv(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); + } + static void vErf(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = erf(in[i]); } + static void xvErf(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = erf(in[i]); + } + static void vExp(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = exp(in[i]); } + static void xvExp(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = exp(in[i]); + } + static double vExpThreshold() { return -650.0; @@ -111,29 +151,55 @@ struct RefMath for (SizeType i = 0; i < n; ++i) out[i] = tanh(in[i]); } + static void xvTanh(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = tanh(in[i]); + } + static void vSqrt(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]); } + static void xvSqrt(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]); + } + static void vLog(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = log(in[i]); } + static void xvLog(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = log(in[i]); + } + static void vLog1p(SizeType n, const double * in, double * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = log1p(in[i]); } + static void xvLog1p(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = log1p(in[i]); + } + // Not implemented static void vCdfNormInv(SizeType n, const double * in, double * out) { for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); } + + // Not implemented + static void xvCdfNormInv(SizeType n, const double * in, double * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); + } }; /* @@ -155,48 +221,87 @@ struct RefMath static float sPowx(float in, float in1) { return pow(in, in1); } + static float xsPowx(float in, float in1) { return pow(in, in1); } + static float sCeil(float in) { return ceil(in); } + static float xsCeil(float in) { return ceil(in); } + // Not implemented static float sErfInv(float in) { return std::numeric_limits::quiet_NaN(); } + // Not implemented + static float xsErfInv(float in) { return std::numeric_limits::quiet_NaN(); } + static float sErf(float in) { return erf(in); } + static float xsErf(float in) { return erf(in); } + static float sLog(float in) { return log(in); } + static float xsLog(float in) { return log(in); } + // Not implemented static float sCdfNormInv(float in) { return std::numeric_limits::quiet_NaN(); } + static float xsCdfNormInv(float in) { return std::numeric_limits::quiet_NaN(); } + static void vPowx(SizeType n, const float * in, float in1, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = pow(in[i], in1); } + static void xvPowx(SizeType n, const float * in, float in1, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = pow(in[i], in1); + } + static void vCeil(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = ceil(in[i]); } + static void xvCeil(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = ceil(in[i]); + } + // Not implemented static void vErfInv(SizeType n, const float * in, float * out) { for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); } + // Not implemented + static void xvErfInv(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); + } + static void vErf(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = erf(in[i]); } + static void xvErf(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = erf(in[i]); + } + static void vExp(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = exp(in[i]); } + static void xvExp(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = exp(in[i]); + } + static float vExpThreshold() { return -75.0f; @@ -208,29 +313,54 @@ struct RefMath for (SizeType i = 0; i < n; ++i) out[i] = tanh(in[i]); } + static void xvTanh(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = tanh(in[i]); + } + static void vSqrt(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]); } + static void xvSqrt(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = sqrt(in[i]); + } + static void vLog(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = log(in[i]); } + static void xvLog(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = log(in[i]); + } + static void vLog1p(SizeType n, const float * in, float * out) { #pragma omp simd for (SizeType i = 0; i < n; ++i) out[i] = log1p(in[i]); } + static void xvLog1p(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = log1p(in[i]); + } + // Not implemented static void vCdfNormInv(SizeType n, const float * in, float * out) { for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); } + // Not implemented + static void xvCdfNormInv(SizeType n, const float * in, float * out) + { + for (SizeType i = 0; i < n; ++i) out[i] = std::numeric_limits::quiet_NaN(); + } }; } // namespace ref From b3b4cb690abd2081362e521270bc325128a9d5bb Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 9 Oct 2024 02:18:05 -0700 Subject: [PATCH 15/35] fixes for clang --- ...classification_predict_dense_default_batch_impl.i | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 7b7b12d1a30..2215440e0d6 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -145,11 +145,11 @@ Status KNNClassificationPredictKernel::compu if (par3 == NULL) return Status(ErrorNullParameterNotSupported); - const Model * const model = static_cast(m); - const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); - const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); - const NumericTable & data = *(model->impl()->getData()); - const NumericTable * labels = nullptr; + const Model * const model = static_cast(m); + const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); + const NumericTable & data = *(model->impl()->getData()); + const NumericTable * labels = nullptr; if (resultsToEvaluate != 0) { labels = model->impl()->getLabels().get(); @@ -339,7 +339,7 @@ void KNNClassificationPredictKernel::findNea size_t i; SearchNode cur, toPush; const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); - + const KDTreeNode * node; cur.nodeIndex = rootTreeNodeIndex; cur.minDistance = 0; From 1411ea761496202aee2268b8d7d7ecde5cd1ba43 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 9 Oct 2024 07:22:48 -0700 Subject: [PATCH 16/35] restore threading --- makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/makefile b/makefile index 3ea7f0453ac..dd67ad76a13 100644 --- a/makefile +++ b/makefile @@ -832,14 +832,14 @@ THR_TBB.objs_y := $(addprefix $(THR.tmpdir_y)/,$(THR.srcs:%.cpp=%_tbb.$o)) -include $(THR.tmpdir_y)/*.d $(WORKDIR.lib)/$(thr_tbb_a): LOPT:= -$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) ; $(LINK.STATIC) +$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) $(daaldep.math_backend.thr) ; $(LINK.STATIC) $(THR.tmpdir_y)/%_link.def: $(THR.srcdir)/$(daaldep.$(PLAT).threxport) | $(THR.tmpdir_y)/. $(daaldep.$(_OS).threxport.create) > $@ $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(-fPIC) $(daaldep.rt.thr) $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(if $(OS_is_win),-IMPLIB:$(@:%.dll=%_dll.lib),) -$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) +$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(daaldep.math_backend.thr) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) THR.objs_a := $(THR_TBB.objs_a) THR.objs_y := $(THR_TBB.objs_y) From 22a18dee2036f3267670ee9c73db417b3dc43a19 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 14 Oct 2024 02:03:44 -0700 Subject: [PATCH 17/35] upd macro --- ...ication_predict_dense_default_batch_impl.i | 37 +++++++++++++++---- makefile | 4 +- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 2215440e0d6..70dd20e1fe0 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -24,8 +24,8 @@ #ifndef __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__ #define __KDTREE_KNN_CLASSIFICATION_PREDICT_DENSE_DEFAULT_BATCH_IMPL_I__ -#include "services/daal_defines.h" #include "src/threading/threading.h" +#include "services/daal_defines.h" #include "src/services/service_utils.h" #include "algorithms/algorithm.h" #include "services/daal_atomic_int.h" @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" -#include + namespace daal { namespace algorithms @@ -162,7 +162,8 @@ Status KNNClassificationPredictKernel::compu { iSize *= 2; } - const size_t heapSize = (iSize / 16 + 1) * 16; + const size_t heapSize = (iSize / 16 + 1) * 16; + const size_t xRowCount = x->getNumberOfRows(); const algorithmFpType base = 2.0; const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; @@ -172,7 +173,6 @@ Status KNNClassificationPredictKernel::compu MaxHeap heap; SearchStack stack; }; - SafeStatus safeStat; daal::tls localTLS([&]() -> Local * { Local * const ptr = service_scalable_calloc(1); @@ -200,6 +200,7 @@ Status KNNClassificationPredictKernel::compu }); DAAL_CHECK_STATUS_OK((status.ok()), status); + const auto maxThreads = threader_get_threads_number(); auto nThreads = (maxThreads < 1) ? 1 : maxThreads; const size_t xColumnCount = x->getNumberOfColumns(); @@ -231,12 +232,14 @@ Status KNNClassificationPredictKernel::compu { DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); } + if (labels) { const size_t yColumnCount = y->getNumberOfColumns(); data_management::BlockDescriptor yBD; y->getBlockOfRows(first, last - first, writeOnly, yBD); auto * const dy = yBD.getBlockPtr(); + for (size_t i = 0; i < last - first; ++i) { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, @@ -265,10 +268,13 @@ Status KNNClassificationPredictKernel::compu { DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); } + const_cast(*x).releaseBlockOfRows(xBD); }); + status = safeStat.detach(); if (!status) return status; + localTLS.reduce([&](Local * ptr) -> void { if (ptr) { @@ -344,7 +350,7 @@ void KNNClassificationPredictKernel::findNea cur.nodeIndex = rootTreeNodeIndex; cur.minDistance = 0; - DAAL_ALIGNAS(256) algorithmFpType distance[__KDTREE_LEAF_BUCKET_SIZE + 1]; + alignas(256) algorithmFpType distance[__KDTREE_LEAF_BUCKET_SIZE + 1]; size_t start, end; data_management::BlockDescriptor xBD[2]; @@ -387,7 +393,7 @@ void KNNClassificationPredictKernel::findNea if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(&nodes[cur.nodeIndex]); + DAAL_PREFETCH_READ_T0(node); } else { @@ -410,7 +416,7 @@ void KNNClassificationPredictKernel::findNea else if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(&nodes[cur.nodeIndex]); + DAAL_PREFETCH_READ_T0(node); } else { @@ -427,18 +433,22 @@ services::Status KNNClassificationPredictKernel & distances, size_t index, const size_t nClasses) { typedef daal::internal::MathInst Math; + const size_t heapSize = heap.size(); if (heapSize < 1) return services::Status(); + if (indices.getNumberOfRows() != 0) { DAAL_ASSERT(modelIndices); services::Status s; data_management::BlockDescriptor modelIndicesBD; + const auto nIndices = indices.getNumberOfColumns(); DAAL_ASSERT(heapSize <= nIndices); int * const indicesPtr = indices.getBlockPtr() + index * nIndices; + for (size_t i = 0; i < heapSize; ++i) { s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); @@ -454,14 +464,18 @@ services::Status KNNClassificationPredictKernel labelBD; algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); DAAL_CHECK_MALLOC(classWeights); DAAL_CHECK_MALLOC(classes); + for (size_t i = 0; i < nClasses; ++i) { classWeights[i] = 0; } + for (size_t i = 0; i < heapSize; ++i) { const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); classes[i] = *(labelBD.getBlockPtr()); const_cast(labels)->releaseBlockOfColumnValues(labelBD); } + if (voteWeights == voteUniform) { for (size_t i = 0; i < heapSize; ++i) @@ -496,9 +514,11 @@ services::Status KNNClassificationPredictKernel::get(); bool isContainZero = false; + for (size_t i = 0; i < heapSize; ++i) { if (heap[i].distance <= epsilon) @@ -507,6 +527,7 @@ services::Status KNNClassificationPredictKernel(classes); service_free(classWeights); classes = nullptr; diff --git a/makefile b/makefile index dd67ad76a13..3ea7f0453ac 100644 --- a/makefile +++ b/makefile @@ -832,14 +832,14 @@ THR_TBB.objs_y := $(addprefix $(THR.tmpdir_y)/,$(THR.srcs:%.cpp=%_tbb.$o)) -include $(THR.tmpdir_y)/*.d $(WORKDIR.lib)/$(thr_tbb_a): LOPT:= -$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) $(daaldep.math_backend.thr) ; $(LINK.STATIC) +$(WORKDIR.lib)/$(thr_tbb_a): $(THR_TBB.objs_a) ; $(LINK.STATIC) $(THR.tmpdir_y)/%_link.def: $(THR.srcdir)/$(daaldep.$(PLAT).threxport) | $(THR.tmpdir_y)/. $(daaldep.$(_OS).threxport.create) > $@ $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(-fPIC) $(daaldep.rt.thr) $(WORKDIR.lib)/$(thr_tbb_y): LOPT += $(if $(OS_is_win),-IMPLIB:$(@:%.dll=%_dll.lib),) -$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(daaldep.math_backend.thr) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) +$(WORKDIR.lib)/$(thr_tbb_y): $(THR_TBB.objs_y) $(if $(OS_is_win),$(THR.tmpdir_y)/dll_tbb.res,) $(THR.tmpdir_y)/$(thr_tbb_y:%.$y=%_link.def) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST) THR.objs_a := $(THR_TBB.objs_a) THR.objs_y := $(THR_TBB.objs_y) From 38b920899890dfc200b8997b96a5ecbfb39869cb Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 14 Oct 2024 05:06:02 -0700 Subject: [PATCH 18/35] more prints on 1 rank knn --- ...ication_predict_dense_default_batch_impl.i | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 70dd20e1fe0..6b5ab024bab 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" - +#include namespace daal { namespace algorithms @@ -209,7 +209,7 @@ Status KNNClassificationPredictKernel::compu services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); - + services::Environment::getInstance()->setNumberOfThreads(1); daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); DAAL_CHECK_MALLOC_THR(local); @@ -274,7 +274,7 @@ Status KNNClassificationPredictKernel::compu status = safeStat.detach(); if (!status) return status; - + services::Environment::getInstance()->setNumberOfThreads(nThreads); localTLS.reduce([&](Local * ptr) -> void { if (ptr) { @@ -341,37 +341,45 @@ void KNNClassificationPredictKernel::findNea { heap.reset(); stack.reset(); + std::cout<<"here1"< curNeighbor; size_t i; SearchNode cur, toPush; const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); - + std::cout<<"here 2"< xBD[2]; + std::cout<<"here 4"<dimension == __KDTREE_NULLDIMENSION) { + std::cout<<"here 7"<leftIndex; end = node->rightIndex; - + std::cout<<"here 8"<(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - + std::cout<<"here 9"<::findNea } else { + std::cout<<"here 13"<distance > curNeighbor.distance) { heap.replaceMax(curNeighbor); radius = heap.getMax()->distance; } } + std::cout<<"here 14"<::findNea { break; } + std::cout<<"here 16"<dimension]; const algorithmFpType diff = val - node->cutPoint; - + std::cout<<"here 18"<leftIndex : node->rightIndex; toPush.nodeIndex = (diff < 0) ? node->rightIndex : node->leftIndex; val -= node->cutPoint; toPush.minDistance = cur.minDistance + val * val; + std::cout<<"here 20"<::findNea { break; } + std::cout<<"here 3"< Date: Tue, 15 Oct 2024 02:45:08 -0700 Subject: [PATCH 19/35] fixes for alignas --- ...ication_predict_dense_default_batch_impl.i | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 6b5ab024bab..ff4e4243f0d 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -341,45 +341,45 @@ void KNNClassificationPredictKernel::findNea { heap.reset(); stack.reset(); - std::cout<<"here1"< curNeighbor; size_t i; SearchNode cur, toPush; const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); - std::cout<<"here 2"< xBD[2]; - std::cout<<"here 4"<dimension == __KDTREE_NULLDIMENSION) { - std::cout<<"here 7"<leftIndex; end = node->rightIndex; - std::cout<<"here 8"<(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - std::cout<<"here 9"<::findNea } else { - std::cout<<"here 13"<distance > curNeighbor.distance) { heap.replaceMax(curNeighbor); radius = heap.getMax()->distance; } } - std::cout<<"here 14"<::findNea { break; } - std::cout<<"here 16"<dimension]; const algorithmFpType diff = val - node->cutPoint; - std::cout<<"here 18"<leftIndex : node->rightIndex; toPush.nodeIndex = (diff < 0) ? node->rightIndex : node->leftIndex; val -= node->cutPoint; toPush.minDistance = cur.minDistance + val * val; - std::cout<<"here 20"<::findNea { break; } - std::cout<<"here 3"< Date: Tue, 15 Oct 2024 04:31:11 -0700 Subject: [PATCH 20/35] fixes for kd tree --- ...n_classification_predict_dense_default_batch_impl.i | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index ff4e4243f0d..53ad8ab806a 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -209,7 +209,7 @@ Status KNNClassificationPredictKernel::compu services::internal::TArrayScalable soa_arrays; bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); - services::Environment::getInstance()->setNumberOfThreads(1); + daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); DAAL_CHECK_MALLOC_THR(local); @@ -274,7 +274,7 @@ Status KNNClassificationPredictKernel::compu status = safeStat.detach(); if (!status) return status; - services::Environment::getInstance()->setNumberOfThreads(nThreads); + localTLS.reduce([&](Local * ptr) -> void { if (ptr) { @@ -341,17 +341,14 @@ void KNNClassificationPredictKernel::findNea { heap.reset(); stack.reset(); - std::cout << "here1" << std::endl; GlobalNeighbors curNeighbor; size_t i; SearchNode cur, toPush; const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); - std::cout << "here 2" << std::endl; const KDTreeNode * node; cur.nodeIndex = rootTreeNodeIndex; cur.minDistance = 0; - std::cout << "here 3" << std::endl; - alignas(256) algorithmFpType distance[__KDTREE_LEAF_BUCKET_SIZE + 1]; + algorithmFpType distance[__KDTREE_LEAF_BUCKET_SIZE + 1]; size_t start, end; data_management::BlockDescriptor xBD[2]; @@ -379,7 +376,6 @@ void KNNClassificationPredictKernel::findNea curNeighbor.index = i; if (heap.size() < k) { - std::cout << "here 12" << std::endl; heap.push(curNeighbor, k); if (heap.size() == k) From 282a97b411fbea2394211f2c33ba3953a37522cc Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 15 Oct 2024 07:20:21 -0700 Subject: [PATCH 21/35] remove debug stuff --- ...ication_predict_dense_default_batch_impl.i | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 53ad8ab806a..530bc3769e4 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" -#include + namespace daal { namespace algorithms @@ -352,26 +352,19 @@ void KNNClassificationPredictKernel::findNea size_t start, end; data_management::BlockDescriptor xBD[2]; - // std::cout<<"here 4"<dimension == __KDTREE_NULLDIMENSION) { - // std::cout<<"here 7"<leftIndex; end = node->rightIndex; - // std::cout<<"here 8"<(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - // std::cout<<"here 9"<::findNea } else { - // std::cout<<"here 13"<distance > curNeighbor.distance) { heap.replaceMax(curNeighbor); radius = heap.getMax()->distance; } } - // std::cout<<"here 14"<::findNea { break; } - // std::cout<<"here 16"<dimension]; const algorithmFpType diff = val - node->cutPoint; - // std::cout<<"here 18"<leftIndex : node->rightIndex; toPush.nodeIndex = (diff < 0) ? node->rightIndex : node->leftIndex; val -= node->cutPoint; toPush.minDistance = cur.minDistance + val * val; - // std::cout<<"here 20"<::findNea { break; } - // std::cout<<"here 3"< Date: Wed, 16 Oct 2024 03:41:01 -0700 Subject: [PATCH 22/35] fixes --- ...ication_predict_dense_default_batch_impl.i | 132 ++++++++++-------- 1 file changed, 73 insertions(+), 59 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 530bc3769e4..5b2bdc35e45 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -40,6 +40,10 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" +#if defined(DAAL_INTEL_CPP_COMPILER) + #include +#endif + namespace daal { namespace algorithms @@ -208,7 +212,7 @@ Status KNNClassificationPredictKernel::compu const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; services::internal::TArrayScalable soa_arrays; - bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); + bool isHomogenSOA = false; daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); @@ -217,59 +221,62 @@ Status KNNClassificationPredictKernel::compu const size_t first = iBlock * rowsPerBlock; const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - const algorithmFpType radius = MaxVal::get(); - data_management::BlockDescriptor xBD; - const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); - const algorithmFpType * const dx = xBD.getBlockPtr(); - - data_management::BlockDescriptor indicesBD; - data_management::BlockDescriptor distancesBD; - if (indices) - { - DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); - } - if (distances) + if (first < last) { - DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); - } + const algorithmFpType radius = MaxVal::get(); + data_management::BlockDescriptor xBD; + const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); + const algorithmFpType * const dx = xBD.getBlockPtr(); + + data_management::BlockDescriptor indicesBD; + data_management::BlockDescriptor distancesBD; + if (indices) + { + DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); + } + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); + } - if (labels) - { - const size_t yColumnCount = y->getNumberOfColumns(); - data_management::BlockDescriptor yBD; - y->getBlockOfRows(first, last - first, writeOnly, yBD); - auto * const dy = yBD.getBlockPtr(); + if (labels) + { + const size_t yColumnCount = y->getNumberOfColumns(); + data_management::BlockDescriptor yBD; + y->getBlockOfRows(first, last - first, writeOnly, yBD); + auto * const dy = yBD.getBlockPtr(); - for (size_t i = 0; i < last - first; ++i) + for (size_t i = 0; i < last - first; ++i) + { + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR( + predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + } + y->releaseBlockOfRows(yBD); + } + else { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, - soa_arrays); - DAAL_CHECK_STATUS_THR( - predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + for (size_t i = 0; i < last - first; ++i) + { + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + } } - y->releaseBlockOfRows(yBD); - } - else - { - for (size_t i = 0; i < last - first; ++i) + + if (indices) { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, - soa_arrays); - DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); } - } - if (indices) - { - DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); - } + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + } - if (distances) - { - DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + const_cast(*x).releaseBlockOfRows(xBD); } - - const_cast(*x).releaseBlockOfRows(xBD); }); status = safeStat.detach(); @@ -291,45 +298,52 @@ DAAL_FORCEINLINE void computeDistance(size_t start, size_t end, algorithmFpType const NumericTable & data, data_management::BlockDescriptor xBD[2], services::internal::TArrayScalable & soa_arrays) { + // Initialize the distance array to zero for the range [start, end) for (size_t i = start; i < end; ++i) { distance[i - start] = 0; } - size_t curBDIdx = 0; - size_t nextBDIdx = 1; + size_t curBDIdx = 0; // Current block descriptor index + size_t nextBDIdx = 1; // Next block descriptor index - const size_t xColumnCount = data.getNumberOfColumns(); + const size_t xColumnCount = data.getNumberOfColumns(); // Total number of columns in the data - const algorithmFpType * nx = nullptr; - const algorithmFpType * dx = getNtData(isHomogenSOA, 0, start, end - start, data, xBD[curBDIdx], soa_arrays); + const algorithmFpType * dx = + getNtData(isHomogenSOA, 0, start, end - start, data, xBD[curBDIdx], soa_arrays); // Retrieve data for the first column - size_t j; - for (j = 1; j < xColumnCount; ++j) + // Iterate over each column to compute squared distances + for (size_t j = 1; j < xColumnCount; ++j) { - nx = getNtData(isHomogenSOA, j, start, end - start, data, xBD[nextBDIdx], soa_arrays); + const algorithmFpType * nx = + getNtData(isHomogenSOA, j, start, end - start, data, xBD[nextBDIdx], soa_arrays); // Retrieve data for the next column + // Prefetch the next column data to optimize memory access DAAL_PREFETCH_READ_T0(nx); - DAAL_PREFETCH_READ_T0(nx + 16); + DAAL_PREFETCH_READ_T0(nx + 16); // Adjust prefetch based on expected access patterns + // Compute distance contributions from the current column for (size_t i = 0; i < end - start; ++i) { distance[i] += (query[j - 1] - dx[i]) * (query[j - 1] - dx[i]); } + // Release the current block of data to avoid memory leaks releaseNtData(isHomogenSOA, data, xBD[curBDIdx]); + // Swap block descriptors and pointers for the next iteration services::internal::swap(curBDIdx, nextBDIdx); services::internal::swap(dx, nx); } - { - for (size_t i = 0; i < end - start; ++i) - { - distance[i] += (query[j - 1] - dx[i]) * (query[j - 1] - dx[i]); - } - releaseNtData(isHomogenSOA, data, xBD[curBDIdx]); + // Handle the last column after the loop + for (size_t i = 0; i < end - start; ++i) + { + distance[i] += (query[xColumnCount - 1] - dx[i]) * (query[xColumnCount - 1] - dx[i]); } + + // Release the final block of data + releaseNtData(isHomogenSOA, data, xBD[curBDIdx]); } template From 432f7466709f40a0969cd84bb9f442066d3561ec Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 16 Oct 2024 07:59:10 -0700 Subject: [PATCH 23/35] tmp rm predict --- ...ication_predict_dense_default_batch_impl.i | 588 +++++++++--------- 1 file changed, 294 insertions(+), 294 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 5b2bdc35e45..097ee10f46e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -127,169 +127,169 @@ Status KNNClassificationPredictKernel::compu { Status status; - typedef GlobalNeighbors Neighbors; - typedef Heap MaxHeap; - typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; - typedef daal::services::internal::MaxVal MaxVal; - typedef daal::internal::MathInst Math; - - size_t k; - size_t nClasses; - VoteWeights voteWeights = voteUniform; - DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; - - const auto par3 = dynamic_cast(par); - if (par3) - { - k = par3->k; - voteWeights = par3->voteWeights; - resultsToEvaluate = par3->resultsToEvaluate; - nClasses = par3->nClasses; - } - - if (par3 == NULL) return Status(ErrorNullParameterNotSupported); - - const Model * const model = static_cast(m); - const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); - const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); - const NumericTable & data = *(model->impl()->getData()); - const NumericTable * labels = nullptr; - if (resultsToEvaluate != 0) - { - labels = model->impl()->getLabels().get(); - } - - const NumericTable * const modelIndices = model->impl()->getIndices().get(); - - size_t iSize = 1; - while (iSize < k) - { - iSize *= 2; - } - const size_t heapSize = (iSize / 16 + 1) * 16; - - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; - const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; - const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); - struct Local - { - MaxHeap heap; - SearchStack stack; - }; - SafeStatus safeStat; - daal::tls localTLS([&]() -> Local * { - Local * const ptr = service_scalable_calloc(1); - if (ptr) - { - if (!ptr->heap.init(heapSize)) - { - safeStat.add(services::ErrorMemoryAllocationFailed); - service_scalable_free(ptr); - return nullptr; - } - if (!ptr->stack.init(stackSize)) - { - safeStat.add(services::ErrorMemoryAllocationFailed); - ptr->heap.clear(); - service_scalable_free(ptr); - return nullptr; - } - } - else - { - safeStat.add(services::ErrorMemoryAllocationFailed); - } - return ptr; - }); - - DAAL_CHECK_STATUS_OK((status.ok()), status); - - const auto maxThreads = threader_get_threads_number(); - auto nThreads = (maxThreads < 1) ? 1 : maxThreads; - const size_t xColumnCount = x->getNumberOfColumns(); - const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; - const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; - - services::internal::TArrayScalable soa_arrays; - bool isHomogenSOA = false; - - daal::threader_for(blockCount, blockCount, [&](int iBlock) { - Local * const local = localTLS.local(); - DAAL_CHECK_MALLOC_THR(local); - - const size_t first = iBlock * rowsPerBlock; - const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - - if (first < last) - { - const algorithmFpType radius = MaxVal::get(); - data_management::BlockDescriptor xBD; - const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); - const algorithmFpType * const dx = xBD.getBlockPtr(); - - data_management::BlockDescriptor indicesBD; - data_management::BlockDescriptor distancesBD; - if (indices) - { - DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); - } - if (distances) - { - DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); - } - - if (labels) - { - const size_t yColumnCount = y->getNumberOfColumns(); - data_management::BlockDescriptor yBD; - y->getBlockOfRows(first, last - first, writeOnly, yBD); - auto * const dy = yBD.getBlockPtr(); - - for (size_t i = 0; i < last - first; ++i) - { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR( - predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - } - y->releaseBlockOfRows(yBD); - } - else - { - for (size_t i = 0; i < last - first; ++i) - { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - } - } - - if (indices) - { - DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); - } - - if (distances) - { - DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); - } - - const_cast(*x).releaseBlockOfRows(xBD); - } - }); - - status = safeStat.detach(); - if (!status) return status; - - localTLS.reduce([&](Local * ptr) -> void { - if (ptr) - { - ptr->stack.clear(); - ptr->heap.clear(); - service_scalable_free(ptr); - } - }); + // typedef GlobalNeighbors Neighbors; + // typedef Heap MaxHeap; + // typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; + // typedef daal::services::internal::MaxVal MaxVal; + // typedef daal::internal::MathInst Math; + + // size_t k; + // size_t nClasses; + // VoteWeights voteWeights = voteUniform; + // DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; + + // const auto par3 = dynamic_cast(par); + // if (par3) + // { + // k = par3->k; + // voteWeights = par3->voteWeights; + // resultsToEvaluate = par3->resultsToEvaluate; + // nClasses = par3->nClasses; + // } + + // if (par3 == NULL) return Status(ErrorNullParameterNotSupported); + + // const Model * const model = static_cast(m); + // const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + // const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); + // const NumericTable & data = *(model->impl()->getData()); + // const NumericTable * labels = nullptr; + // if (resultsToEvaluate != 0) + // { + // labels = model->impl()->getLabels().get(); + // } + + // const NumericTable * const modelIndices = model->impl()->getIndices().get(); + + // size_t iSize = 1; + // while (iSize < k) + // { + // iSize *= 2; + // } + // const size_t heapSize = (iSize / 16 + 1) * 16; + + // const size_t xRowCount = x->getNumberOfRows(); + // const algorithmFpType base = 2.0; + // const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; + // const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); + // struct Local + // { + // MaxHeap heap; + // SearchStack stack; + // }; + // SafeStatus safeStat; + // daal::tls localTLS([&]() -> Local * { + // Local * const ptr = service_scalable_calloc(1); + // if (ptr) + // { + // if (!ptr->heap.init(heapSize)) + // { + // safeStat.add(services::ErrorMemoryAllocationFailed); + // service_scalable_free(ptr); + // return nullptr; + // } + // if (!ptr->stack.init(stackSize)) + // { + // safeStat.add(services::ErrorMemoryAllocationFailed); + // ptr->heap.clear(); + // service_scalable_free(ptr); + // return nullptr; + // } + // } + // else + // { + // safeStat.add(services::ErrorMemoryAllocationFailed); + // } + // return ptr; + // }); + + // DAAL_CHECK_STATUS_OK((status.ok()), status); + + // const auto maxThreads = threader_get_threads_number(); + // auto nThreads = (maxThreads < 1) ? 1 : maxThreads; + // const size_t xColumnCount = x->getNumberOfColumns(); + // const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; + // const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; + + // services::internal::TArrayScalable soa_arrays; + // bool isHomogenSOA = false; + + // daal::threader_for(blockCount, blockCount, [&](int iBlock) { + // Local * const local = localTLS.local(); + // DAAL_CHECK_MALLOC_THR(local); + + // const size_t first = iBlock * rowsPerBlock; + // const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); + + // if (first < last) + // { + // const algorithmFpType radius = MaxVal::get(); + // data_management::BlockDescriptor xBD; + // const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); + // const algorithmFpType * const dx = xBD.getBlockPtr(); + + // data_management::BlockDescriptor indicesBD; + // data_management::BlockDescriptor distancesBD; + // if (indices) + // { + // DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); + // } + // if (distances) + // { + // DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); + // } + + // if (labels) + // { + // const size_t yColumnCount = y->getNumberOfColumns(); + // data_management::BlockDescriptor yBD; + // y->getBlockOfRows(first, last - first, writeOnly, yBD); + // auto * const dy = yBD.getBlockPtr(); + + // for (size_t i = 0; i < last - first; ++i) + // { + // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + // isHomogenSOA, soa_arrays); + // DAAL_CHECK_STATUS_THR( + // predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + // } + // y->releaseBlockOfRows(yBD); + // } + // else + // { + // for (size_t i = 0; i < last - first; ++i) + // { + // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + // isHomogenSOA, soa_arrays); + // DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + // } + // } + + // if (indices) + // { + // DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); + // } + + // if (distances) + // { + // DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + // } + + // const_cast(*x).releaseBlockOfRows(xBD); + // } + // }); + + // status = safeStat.detach(); + // if (!status) return status; + + // localTLS.reduce([&](Local * ptr) -> void { + // if (ptr) + // { + // ptr->stack.clear(); + // ptr->heap.clear(); + // service_scalable_free(ptr); + // } + // }); return status; } @@ -442,137 +442,137 @@ services::Status KNNClassificationPredictKernel & indices, data_management::BlockDescriptor & distances, size_t index, const size_t nClasses) { - typedef daal::internal::MathInst Math; - - const size_t heapSize = heap.size(); - if (heapSize < 1) return services::Status(); - - if (indices.getNumberOfRows() != 0) - { - DAAL_ASSERT(modelIndices); - - services::Status s; - data_management::BlockDescriptor modelIndicesBD; - - const auto nIndices = indices.getNumberOfColumns(); - DAAL_ASSERT(heapSize <= nIndices); - - int * const indicesPtr = indices.getBlockPtr() + index * nIndices; - - for (size_t i = 0; i < heapSize; ++i) - { - s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); - DAAL_ASSERT(s.ok()); - - indicesPtr[i] = *(modelIndicesBD.getBlockPtr()); - - s |= const_cast(modelIndices)->releaseBlockOfRows(modelIndicesBD); - DAAL_ASSERT(s.ok()); - } - } - - if (distances.getNumberOfRows() != 0) - { - services::Status s; - - const auto nDistances = distances.getNumberOfColumns(); - DAAL_ASSERT(heapSize <= nDistances); - - algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; - for (size_t i = 0; i < heapSize; ++i) - { - distancesPtr[i] = heap[i].distance; - } - - Math::xvSqrt(heapSize, distancesPtr, distancesPtr); - - for (size_t i = heapSize; i < nDistances; ++i) - { - distancesPtr[i] = -1; - } - } - - if (labels) - { - DAAL_ASSERT(predictedClass); - - data_management::BlockDescriptor labelBD; - algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); - algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); - DAAL_CHECK_MALLOC(classWeights); - DAAL_CHECK_MALLOC(classes); - - for (size_t i = 0; i < nClasses; ++i) - { - classWeights[i] = 0; - } - - for (size_t i = 0; i < heapSize; ++i) - { - const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); - classes[i] = *(labelBD.getBlockPtr()); - const_cast(labels)->releaseBlockOfColumnValues(labelBD); - } - - if (voteWeights == voteUniform) - { - for (size_t i = 0; i < heapSize; ++i) - { - classWeights[(size_t)(classes[i])] += 1; - } - } - else - { - DAAL_ASSERT(voteWeights == voteDistance); - - const algorithmFpType epsilon = daal::services::internal::EpsilonVal::get(); - - bool isContainZero = false; - - for (size_t i = 0; i < heapSize; ++i) - { - if (heap[i].distance <= epsilon) - { - isContainZero = true; - break; - } - } - - if (isContainZero) - { - for (size_t i = 0; i < heapSize; ++i) - { - if (heap[i].distance <= epsilon) - { - classWeights[(size_t)(classes[i])] += 1; - } - } - } - else - { - for (size_t i = 0; i < heapSize; ++i) - { - classWeights[(size_t)(classes[i])] += Math::sSqrt(1 / heap[i].distance); - } - } - } - - algorithmFpType maxWeightClass = 0; - algorithmFpType maxWeight = 0; - for (size_t i = 0; i < nClasses; ++i) - { - if (classWeights[i] > maxWeight) - { - maxWeight = classWeights[i]; - maxWeightClass = i; - } - } - *predictedClass = maxWeightClass; - - service_free(classes); - service_free(classWeights); - classes = nullptr; - } + // typedef daal::internal::MathInst Math; + + // const size_t heapSize = heap.size(); + // if (heapSize < 1) return services::Status(); + + // if (indices.getNumberOfRows() != 0) + // { + // DAAL_ASSERT(modelIndices); + + // services::Status s; + // data_management::BlockDescriptor modelIndicesBD; + + // const auto nIndices = indices.getNumberOfColumns(); + // DAAL_ASSERT(heapSize <= nIndices); + + // int * const indicesPtr = indices.getBlockPtr() + index * nIndices; + + // for (size_t i = 0; i < heapSize; ++i) + // { + // s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); + // DAAL_ASSERT(s.ok()); + + // indicesPtr[i] = *(modelIndicesBD.getBlockPtr()); + + // s |= const_cast(modelIndices)->releaseBlockOfRows(modelIndicesBD); + // DAAL_ASSERT(s.ok()); + // } + // } + + // if (distances.getNumberOfRows() != 0) + // { + // services::Status s; + + // const auto nDistances = distances.getNumberOfColumns(); + // DAAL_ASSERT(heapSize <= nDistances); + + // algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; + // for (size_t i = 0; i < heapSize; ++i) + // { + // distancesPtr[i] = heap[i].distance; + // } + + // Math::xvSqrt(heapSize, distancesPtr, distancesPtr); + + // for (size_t i = heapSize; i < nDistances; ++i) + // { + // distancesPtr[i] = -1; + // } + // } + + // if (labels) + // { + // DAAL_ASSERT(predictedClass); + + // data_management::BlockDescriptor labelBD; + // algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); + // algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); + // DAAL_CHECK_MALLOC(classWeights); + // DAAL_CHECK_MALLOC(classes); + + // for (size_t i = 0; i < nClasses; ++i) + // { + // classWeights[i] = 0; + // } + + // for (size_t i = 0; i < heapSize; ++i) + // { + // const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); + // classes[i] = *(labelBD.getBlockPtr()); + // const_cast(labels)->releaseBlockOfColumnValues(labelBD); + // } + + // if (voteWeights == voteUniform) + // { + // for (size_t i = 0; i < heapSize; ++i) + // { + // classWeights[(size_t)(classes[i])] += 1; + // } + // } + // else + // { + // DAAL_ASSERT(voteWeights == voteDistance); + + // const algorithmFpType epsilon = daal::services::internal::EpsilonVal::get(); + + // bool isContainZero = false; + + // for (size_t i = 0; i < heapSize; ++i) + // { + // if (heap[i].distance <= epsilon) + // { + // isContainZero = true; + // break; + // } + // } + + // if (isContainZero) + // { + // for (size_t i = 0; i < heapSize; ++i) + // { + // if (heap[i].distance <= epsilon) + // { + // classWeights[(size_t)(classes[i])] += 1; + // } + // } + // } + // else + // { + // for (size_t i = 0; i < heapSize; ++i) + // { + // classWeights[(size_t)(classes[i])] += Math::sSqrt(1 / heap[i].distance); + // } + // } + // } + + // algorithmFpType maxWeightClass = 0; + // algorithmFpType maxWeight = 0; + // for (size_t i = 0; i < nClasses; ++i) + // { + // if (classWeights[i] > maxWeight) + // { + // maxWeight = classWeights[i]; + // maxWeightClass = i; + // } + // } + // *predictedClass = maxWeightClass; + + // service_free(classes); + // service_free(classWeights); + // classes = nullptr; + // } return services::Status(); } From d77dbc3257842e14acc6f78e5d7ff7359a833eca Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 16 Oct 2024 11:51:52 -0700 Subject: [PATCH 24/35] partial restore --- ...ication_predict_dense_default_batch_impl.i | 290 +++++++++--------- 1 file changed, 145 insertions(+), 145 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 097ee10f46e..614bb4b06f2 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -127,169 +127,169 @@ Status KNNClassificationPredictKernel::compu { Status status; - // typedef GlobalNeighbors Neighbors; - // typedef Heap MaxHeap; - // typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; - // typedef daal::services::internal::MaxVal MaxVal; - // typedef daal::internal::MathInst Math; - - // size_t k; - // size_t nClasses; - // VoteWeights voteWeights = voteUniform; - // DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; - - // const auto par3 = dynamic_cast(par); - // if (par3) - // { - // k = par3->k; - // voteWeights = par3->voteWeights; - // resultsToEvaluate = par3->resultsToEvaluate; - // nClasses = par3->nClasses; - // } + typedef GlobalNeighbors Neighbors; + typedef Heap MaxHeap; + typedef kdtree_knn_classification::internal::Stack, cpu> SearchStack; + typedef daal::services::internal::MaxVal MaxVal; + typedef daal::internal::MathInst Math; + + size_t k; + size_t nClasses; + VoteWeights voteWeights = voteUniform; + DAAL_UINT64 resultsToEvaluate = classifier::computeClassLabels; + + const auto par3 = dynamic_cast(par); + if (par3) + { + k = par3->k; + voteWeights = par3->voteWeights; + resultsToEvaluate = par3->resultsToEvaluate; + nClasses = par3->nClasses; + } - // if (par3 == NULL) return Status(ErrorNullParameterNotSupported); + if (par3 == NULL) return Status(ErrorNullParameterNotSupported); - // const Model * const model = static_cast(m); - // const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); - // const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); - // const NumericTable & data = *(model->impl()->getData()); - // const NumericTable * labels = nullptr; - // if (resultsToEvaluate != 0) - // { - // labels = model->impl()->getLabels().get(); - // } + const Model * const model = static_cast(m); + const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); + const NumericTable & data = *(model->impl()->getData()); + const NumericTable * labels = nullptr; + if (resultsToEvaluate != 0) + { + labels = model->impl()->getLabels().get(); + } - // const NumericTable * const modelIndices = model->impl()->getIndices().get(); + const NumericTable * const modelIndices = model->impl()->getIndices().get(); - // size_t iSize = 1; - // while (iSize < k) - // { - // iSize *= 2; - // } - // const size_t heapSize = (iSize / 16 + 1) * 16; + size_t iSize = 1; + while (iSize < k) + { + iSize *= 2; + } + const size_t heapSize = (iSize / 16 + 1) * 16; - // const size_t xRowCount = x->getNumberOfRows(); - // const algorithmFpType base = 2.0; - // const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; - // const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); - // struct Local - // { - // MaxHeap heap; - // SearchStack stack; - // }; - // SafeStatus safeStat; - // daal::tls localTLS([&]() -> Local * { - // Local * const ptr = service_scalable_calloc(1); - // if (ptr) - // { - // if (!ptr->heap.init(heapSize)) - // { - // safeStat.add(services::ErrorMemoryAllocationFailed); - // service_scalable_free(ptr); - // return nullptr; - // } - // if (!ptr->stack.init(stackSize)) - // { - // safeStat.add(services::ErrorMemoryAllocationFailed); - // ptr->heap.clear(); - // service_scalable_free(ptr); - // return nullptr; - // } - // } - // else - // { - // safeStat.add(services::ErrorMemoryAllocationFailed); - // } - // return ptr; - // }); + const size_t xRowCount = x->getNumberOfRows(); + const algorithmFpType base = 2.0; + const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; + const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); + struct Local + { + MaxHeap heap; + SearchStack stack; + }; + SafeStatus safeStat; + daal::tls localTLS([&]() -> Local * { + Local * const ptr = service_scalable_calloc(1); + if (ptr) + { + if (!ptr->heap.init(heapSize)) + { + safeStat.add(services::ErrorMemoryAllocationFailed); + service_scalable_free(ptr); + return nullptr; + } + if (!ptr->stack.init(stackSize)) + { + safeStat.add(services::ErrorMemoryAllocationFailed); + ptr->heap.clear(); + service_scalable_free(ptr); + return nullptr; + } + } + else + { + safeStat.add(services::ErrorMemoryAllocationFailed); + } + return ptr; + }); - // DAAL_CHECK_STATUS_OK((status.ok()), status); + DAAL_CHECK_STATUS_OK((status.ok()), status); - // const auto maxThreads = threader_get_threads_number(); - // auto nThreads = (maxThreads < 1) ? 1 : maxThreads; - // const size_t xColumnCount = x->getNumberOfColumns(); - // const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; - // const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; + const auto maxThreads = threader_get_threads_number(); + auto nThreads = (maxThreads < 1) ? 1 : maxThreads; + const size_t xColumnCount = x->getNumberOfColumns(); + const auto rowsPerBlock = (xRowCount + nThreads - 1) / nThreads; + const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; - // services::internal::TArrayScalable soa_arrays; - // bool isHomogenSOA = false; + services::internal::TArrayScalable soa_arrays; + bool isHomogenSOA = false; - // daal::threader_for(blockCount, blockCount, [&](int iBlock) { - // Local * const local = localTLS.local(); - // DAAL_CHECK_MALLOC_THR(local); + daal::threader_for(blockCount, blockCount, [&](int iBlock) { + Local * const local = localTLS.local(); + DAAL_CHECK_MALLOC_THR(local); - // const size_t first = iBlock * rowsPerBlock; - // const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); + const size_t first = iBlock * rowsPerBlock; + const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - // if (first < last) - // { - // const algorithmFpType radius = MaxVal::get(); - // data_management::BlockDescriptor xBD; - // const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); - // const algorithmFpType * const dx = xBD.getBlockPtr(); - - // data_management::BlockDescriptor indicesBD; - // data_management::BlockDescriptor distancesBD; - // if (indices) - // { - // DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); - // } - // if (distances) - // { - // DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); - // } + if (false) + { + const algorithmFpType radius = MaxVal::get(); + data_management::BlockDescriptor xBD; + const_cast(*x).getBlockOfRows(first, last - first, readOnly, xBD); + const algorithmFpType * const dx = xBD.getBlockPtr(); + + data_management::BlockDescriptor indicesBD; + data_management::BlockDescriptor distancesBD; + if (indices) + { + DAAL_CHECK_STATUS_THR(indices->getBlockOfRows(first, last - first, writeOnly, indicesBD)); + } + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->getBlockOfRows(first, last - first, writeOnly, distancesBD)); + } - // if (labels) - // { - // const size_t yColumnCount = y->getNumberOfColumns(); - // data_management::BlockDescriptor yBD; - // y->getBlockOfRows(first, last - first, writeOnly, yBD); - // auto * const dy = yBD.getBlockPtr(); + if (labels) + { + const size_t yColumnCount = y->getNumberOfColumns(); + data_management::BlockDescriptor yBD; + y->getBlockOfRows(first, last - first, writeOnly, yBD); + auto * const dy = yBD.getBlockPtr(); - // for (size_t i = 0; i < last - first; ++i) - // { - // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - // isHomogenSOA, soa_arrays); - // DAAL_CHECK_STATUS_THR( - // predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - // } - // y->releaseBlockOfRows(yBD); - // } - // else - // { - // for (size_t i = 0; i < last - first; ++i) - // { - // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - // isHomogenSOA, soa_arrays); - // DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); - // } - // } + for (size_t i = 0; i < last - first; ++i) + { + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR( + predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + } + y->releaseBlockOfRows(yBD); + } + else + { + for (size_t i = 0; i < last - first; ++i) + { + findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + } + } - // if (indices) - // { - // DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); - // } + if (indices) + { + DAAL_CHECK_STATUS_THR(indices->releaseBlockOfRows(indicesBD)); + } - // if (distances) - // { - // DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); - // } + if (distances) + { + DAAL_CHECK_STATUS_THR(distances->releaseBlockOfRows(distancesBD)); + } - // const_cast(*x).releaseBlockOfRows(xBD); - // } - // }); + const_cast(*x).releaseBlockOfRows(xBD); + } + }); - // status = safeStat.detach(); - // if (!status) return status; + status = safeStat.detach(); + if (!status) return status; - // localTLS.reduce([&](Local * ptr) -> void { - // if (ptr) - // { - // ptr->stack.clear(); - // ptr->heap.clear(); - // service_scalable_free(ptr); - // } - // }); + localTLS.reduce([&](Local * ptr) -> void { + if (ptr) + { + ptr->stack.clear(); + ptr->heap.clear(); + service_scalable_free(ptr); + } + }); return status; } From 09f5c1cf5a1ba4942858cb3335a0e35e798029e8 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 16 Oct 2024 14:10:36 -0700 Subject: [PATCH 25/35] fixes --- ...ication_predict_dense_default_batch_impl.i | 85 +++++++++---------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 614bb4b06f2..cd53469512e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -243,7 +243,7 @@ Status KNNClassificationPredictKernel::compu { const size_t yColumnCount = y->getNumberOfColumns(); data_management::BlockDescriptor yBD; - y->getBlockOfRows(first, last - first, writeOnly, yBD); + DAAL_CHECK_STATUS_THR(y->getBlockOfRows(first, last - first, writeOnly, yBD)); auto * const dy = yBD.getBlockPtr(); for (size_t i = 0; i < last - first; ++i) @@ -253,7 +253,7 @@ Status KNNClassificationPredictKernel::compu DAAL_CHECK_STATUS_THR( predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } - y->releaseBlockOfRows(yBD); + DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); } else { @@ -442,55 +442,55 @@ services::Status KNNClassificationPredictKernel & indices, data_management::BlockDescriptor & distances, size_t index, const size_t nClasses) { - // typedef daal::internal::MathInst Math; - - // const size_t heapSize = heap.size(); - // if (heapSize < 1) return services::Status(); + typedef daal::internal::MathInst Math; - // if (indices.getNumberOfRows() != 0) - // { - // DAAL_ASSERT(modelIndices); + const size_t heapSize = heap.size(); + if (heapSize < 1) return services::Status(); + SafeStatus safeStat; + if (indices.getNumberOfRows() != 0) + { + DAAL_ASSERT(modelIndices); - // services::Status s; - // data_management::BlockDescriptor modelIndicesBD; + services::Status s; + data_management::BlockDescriptor modelIndicesBD; - // const auto nIndices = indices.getNumberOfColumns(); - // DAAL_ASSERT(heapSize <= nIndices); + const auto nIndices = indices.getNumberOfColumns(); + DAAL_ASSERT(heapSize <= nIndices); - // int * const indicesPtr = indices.getBlockPtr() + index * nIndices; + int * const indicesPtr = indices.getBlockPtr() + index * nIndices; - // for (size_t i = 0; i < heapSize; ++i) - // { - // s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); - // DAAL_ASSERT(s.ok()); + for (size_t i = 0; i < heapSize; ++i) + { + s |= const_cast(modelIndices)->getBlockOfRows(heap[i].index, 1, readOnly, modelIndicesBD); + DAAL_ASSERT(s.ok()); - // indicesPtr[i] = *(modelIndicesBD.getBlockPtr()); + indicesPtr[i] = *(modelIndicesBD.getBlockPtr()); - // s |= const_cast(modelIndices)->releaseBlockOfRows(modelIndicesBD); - // DAAL_ASSERT(s.ok()); - // } - // } + s |= const_cast(modelIndices)->releaseBlockOfRows(modelIndicesBD); + DAAL_ASSERT(s.ok()); + } + } - // if (distances.getNumberOfRows() != 0) - // { - // services::Status s; + if (distances.getNumberOfRows() != 0) + { + services::Status s; - // const auto nDistances = distances.getNumberOfColumns(); - // DAAL_ASSERT(heapSize <= nDistances); + const auto nDistances = distances.getNumberOfColumns(); + DAAL_ASSERT(heapSize <= nDistances); - // algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; - // for (size_t i = 0; i < heapSize; ++i) - // { - // distancesPtr[i] = heap[i].distance; - // } + algorithmFpType * const distancesPtr = distances.getBlockPtr() + index * nDistances; + for (size_t i = 0; i < heapSize; ++i) + { + distancesPtr[i] = heap[i].distance; + } - // Math::xvSqrt(heapSize, distancesPtr, distancesPtr); + Math::xvSqrt(heapSize, distancesPtr, distancesPtr); - // for (size_t i = heapSize; i < nDistances; ++i) - // { - // distancesPtr[i] = -1; - // } - // } + for (size_t i = heapSize; i < nDistances; ++i) + { + distancesPtr[i] = -1; + } + } // if (labels) // { @@ -509,9 +509,9 @@ services::Status KNNClassificationPredictKernel(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); - // classes[i] = *(labelBD.getBlockPtr()); - // const_cast(labels)->releaseBlockOfColumnValues(labelBD); + // // const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); + // // classes[i] = *(labelBD.getBlockPtr()); + // // const_cast(labels)->releaseBlockOfColumnValues(labelBD); // } // if (voteWeights == voteUniform) @@ -571,7 +571,6 @@ services::Status KNNClassificationPredictKernel(classes); // service_free(classWeights); - // classes = nullptr; // } return services::Status(); From dcb9452c233680eedbb717f2bad6b76d67541694 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 17 Oct 2024 00:42:58 -0700 Subject: [PATCH 26/35] fixes for allocation --- ...ication_predict_dense_default_batch_impl.i | 165 +++++++++--------- 1 file changed, 83 insertions(+), 82 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index cd53469512e..646218f314a 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -212,7 +212,7 @@ Status KNNClassificationPredictKernel::compu const auto blockCount = (xRowCount + rowsPerBlock - 1) / rowsPerBlock; services::internal::TArrayScalable soa_arrays; - bool isHomogenSOA = false; + bool isHomogenSOA = checkHomogenSOA(data, soa_arrays); daal::threader_for(blockCount, blockCount, [&](int iBlock) { Local * const local = localTLS.local(); @@ -221,7 +221,7 @@ Status KNNClassificationPredictKernel::compu const size_t first = iBlock * rowsPerBlock; const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - if (false) + if (local) { const algorithmFpType radius = MaxVal::get(); data_management::BlockDescriptor xBD; @@ -492,86 +492,87 @@ services::Status KNNClassificationPredictKernel labelBD; - // algorithmFpType * classes = static_cast(daal::services::internal::service_malloc(heapSize)); - // algorithmFpType * classWeights = static_cast(daal::services::internal::service_malloc(nClasses)); - // DAAL_CHECK_MALLOC(classWeights); - // DAAL_CHECK_MALLOC(classes); - - // for (size_t i = 0; i < nClasses; ++i) - // { - // classWeights[i] = 0; - // } - - // for (size_t i = 0; i < heapSize; ++i) - // { - // // const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); - // // classes[i] = *(labelBD.getBlockPtr()); - // // const_cast(labels)->releaseBlockOfColumnValues(labelBD); - // } - - // if (voteWeights == voteUniform) - // { - // for (size_t i = 0; i < heapSize; ++i) - // { - // classWeights[(size_t)(classes[i])] += 1; - // } - // } - // else - // { - // DAAL_ASSERT(voteWeights == voteDistance); - - // const algorithmFpType epsilon = daal::services::internal::EpsilonVal::get(); - - // bool isContainZero = false; - - // for (size_t i = 0; i < heapSize; ++i) - // { - // if (heap[i].distance <= epsilon) - // { - // isContainZero = true; - // break; - // } - // } - - // if (isContainZero) - // { - // for (size_t i = 0; i < heapSize; ++i) - // { - // if (heap[i].distance <= epsilon) - // { - // classWeights[(size_t)(classes[i])] += 1; - // } - // } - // } - // else - // { - // for (size_t i = 0; i < heapSize; ++i) - // { - // classWeights[(size_t)(classes[i])] += Math::sSqrt(1 / heap[i].distance); - // } - // } - // } - - // algorithmFpType maxWeightClass = 0; - // algorithmFpType maxWeight = 0; - // for (size_t i = 0; i < nClasses; ++i) - // { - // if (classWeights[i] > maxWeight) - // { - // maxWeight = classWeights[i]; - // maxWeightClass = i; - // } - // } - // *predictedClass = maxWeightClass; - - // service_free(classes); - // service_free(classWeights); - // } + if (labels) + { + DAAL_ASSERT(predictedClass); + + data_management::BlockDescriptor labelBD; + algorithmFpType * classes = + static_cast(daal::services::internal::service_malloc(heapSize * sizeof(algorithmFpType))); + DAAL_CHECK_MALLOC(classes) + algorithmFpType * classWeights = + static_cast(daal::services::internal::service_malloc(nClasses * sizeof(algorithmFpType))); + DAAL_CHECK_MALLOC(classWeights) + + for (size_t i = 0; i < nClasses; ++i) + { + classWeights[i] = 0; + } + + for (size_t i = 0; i < heapSize; ++i) + { + const_cast(labels)->getBlockOfColumnValues(0, heap[i].index, 1, readOnly, labelBD); + classes[i] = *(labelBD.getBlockPtr()); + const_cast(labels)->releaseBlockOfColumnValues(labelBD); + } + + if (voteWeights == voteUniform) + { + for (size_t i = 0; i < heapSize; ++i) + { + classWeights[(size_t)(classes[i])] += 1; + } + } + else + { + DAAL_ASSERT(voteWeights == voteDistance); + + const algorithmFpType epsilon = daal::services::internal::EpsilonVal::get(); + + bool isContainZero = false; + + for (size_t i = 0; i < heapSize; ++i) + { + if (heap[i].distance <= epsilon) + { + isContainZero = true; + break; + } + } + + if (isContainZero) + { + for (size_t i = 0; i < heapSize; ++i) + { + if (heap[i].distance <= epsilon) + { + classWeights[(size_t)(classes[i])] += 1; + } + } + } + else + { + for (size_t i = 0; i < heapSize; ++i) + { + classWeights[(size_t)(classes[i])] += Math::sSqrt(1 / heap[i].distance); + } + } + } + + algorithmFpType maxWeightClass = 0; + algorithmFpType maxWeight = 0; + for (size_t i = 0; i < nClasses; ++i) + { + if (classWeights[i] > maxWeight) + { + maxWeight = classWeights[i]; + maxWeightClass = i; + } + } + *predictedClass = maxWeightClass; + daal_free(classes); + daal_free(classWeights); + } return services::Status(); } From 0d03884aed205c0961d14c246aed3f8ad13972ee Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 17 Oct 2024 00:59:25 -0700 Subject: [PATCH 27/35] just find neighbours --- ...ee_knn_classification_predict_dense_default_batch_impl.i | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 646218f314a..c41afdd10e6 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -250,8 +250,8 @@ Status KNNClassificationPredictKernel::compu { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR( - predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + // DAAL_CHECK_STATUS_THR( + // predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); } @@ -261,7 +261,7 @@ Status KNNClassificationPredictKernel::compu { findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays); - DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + //DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } } From 5d95c2ead29ee8c5533682a8a2da0d649264bc71 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 17 Oct 2024 04:10:41 -0700 Subject: [PATCH 28/35] jus tpredict --- ...sification_predict_dense_default_batch_impl.i | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index c41afdd10e6..3d752f34150 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -248,10 +248,10 @@ Status KNNClassificationPredictKernel::compu for (size_t i = 0; i < last - first; ++i) { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - // DAAL_CHECK_STATUS_THR( - // predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + // isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR( + predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } DAAL_CHECK_STATUS_THR(y->releaseBlockOfRows(yBD)); } @@ -259,9 +259,9 @@ Status KNNClassificationPredictKernel::compu { for (size_t i = 0; i < last - first; ++i) { - findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - isHomogenSOA, soa_arrays); - //DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); + // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, + // isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } } @@ -404,7 +404,6 @@ void KNNClassificationPredictKernel::findNea if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(node); } else { @@ -426,7 +425,6 @@ void KNNClassificationPredictKernel::findNea else if (!stack.empty()) { cur = stack.pop(); - DAAL_PREFETCH_READ_T0(node); } else { From cee7fad07a83d581e0eba5e116b1ef3ac31918f9 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 17 Oct 2024 06:49:27 -0700 Subject: [PATCH 29/35] restore neighbours --- ...e_knn_classification_predict_dense_default_batch.h | 9 +++++---- ..._classification_predict_dense_default_batch_impl.i | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h index 499754808fd..20752a8408a 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h @@ -67,10 +67,11 @@ class KNNClassificationPredictKernel : publi const daal::algorithms::Parameter * par); protected: - void findNearestNeighbors(const algorithmFpType * query, Heap, cpu> & heap, - kdtree_knn_classification::internal::Stack, cpu> & stack, size_t k, algorithmFpType radius, - const KDTreeTable & kdTreeTable, size_t rootTreeNodeIndex, const NumericTable & data, const bool isHomogenSOA, - services::internal::TArrayScalable & soa_arrays); + services::Status findNearestNeighbors(const algorithmFpType * query, Heap, cpu> & heap, + kdtree_knn_classification::internal::Stack, cpu> & stack, size_t k, + algorithmFpType radius, const KDTreeTable & kdTreeTable, size_t rootTreeNodeIndex, + const NumericTable & data, const bool isHomogenSOA, + services::internal::TArrayScalable & soa_arrays); services::Status predict(algorithmFpType * predictedClass, const Heap, cpu> & heap, const NumericTable * labels, size_t k, VoteWeights voteWeights, const NumericTable * modelIndices, diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 3d752f34150..08c13b8f735 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -248,8 +248,8 @@ Status KNNClassificationPredictKernel::compu for (size_t i = 0; i < last - first; ++i) { - // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - // isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, + rootTreeNodeIndex, data, isHomogenSOA, soa_arrays)); DAAL_CHECK_STATUS_THR( predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } @@ -259,8 +259,8 @@ Status KNNClassificationPredictKernel::compu { for (size_t i = 0; i < last - first; ++i) { - // findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, rootTreeNodeIndex, data, - // isHomogenSOA, soa_arrays); + DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, + rootTreeNodeIndex, data, isHomogenSOA, soa_arrays)); DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } } @@ -347,7 +347,7 @@ DAAL_FORCEINLINE void computeDistance(size_t start, size_t end, algorithmFpType } template -void KNNClassificationPredictKernel::findNearestNeighbors( +services::Status KNNClassificationPredictKernel::findNearestNeighbors( const algorithmFpType * query, Heap, cpu> & heap, kdtree_knn_classification::internal::Stack, cpu> & stack, size_t k, algorithmFpType radius, const KDTreeTable & kdTreeTable, size_t rootTreeNodeIndex, const NumericTable & data, const bool isHomogenSOA, @@ -432,6 +432,7 @@ void KNNClassificationPredictKernel::findNea } } } + return services::Status(); } template From 4ace1f5d1d63f2ffee692a86b8abe2cc3ff2bd5b Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Thu, 17 Oct 2024 09:41:57 -0700 Subject: [PATCH 30/35] partially restore fnn --- ...ication_predict_dense_default_batch_impl.i | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 08c13b8f735..e3e24f22cf4 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -375,31 +375,31 @@ services::Status KNNClassificationPredictKernelleftIndex; end = node->rightIndex; computeDistance(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - for (i = start; i < end; ++i) - { - if (distance[i - start] <= radius) - { - curNeighbor.distance = distance[i - start]; - curNeighbor.index = i; - if (heap.size() < k) - { - heap.push(curNeighbor, k); - - if (heap.size() == k) - { - radius = heap.getMax()->distance; - } - } - else - { - if (heap.getMax()->distance > curNeighbor.distance) - { - heap.replaceMax(curNeighbor); - radius = heap.getMax()->distance; - } - } - } - } + // for (i = start; i < end; ++i) + // { + // if (distance[i - start] <= radius) + // { + // curNeighbor.distance = distance[i - start]; + // curNeighbor.index = i; + // if (heap.size() < k) + // { + // heap.push(curNeighbor, k); + + // if (heap.size() == k) + // { + // radius = heap.getMax()->distance; + // } + // } + // else + // { + // if (heap.getMax()->distance > curNeighbor.distance) + // { + // heap.replaceMax(curNeighbor); + // radius = heap.getMax()->distance; + // } + // } + // } + // } if (!stack.empty()) { From 5555ae778dd2dfbbeaaa013b0e14aef0de286f63 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Fri, 18 Oct 2024 01:26:07 -0700 Subject: [PATCH 31/35] fixes --- ...n_classification_predict_dense_default_batch.h | 2 +- ...ssification_predict_dense_default_batch_impl.i | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h index 20752a8408a..3c70e6c68dd 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch.h @@ -69,7 +69,7 @@ class KNNClassificationPredictKernel : publi protected: services::Status findNearestNeighbors(const algorithmFpType * query, Heap, cpu> & heap, kdtree_knn_classification::internal::Stack, cpu> & stack, size_t k, - algorithmFpType radius, const KDTreeTable & kdTreeTable, size_t rootTreeNodeIndex, + algorithmFpType radius, const KDTreeNode * nodes, size_t rootTreeNodeIndex, const NumericTable & data, const bool isHomogenSOA, services::internal::TArrayScalable & soa_arrays); diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index e3e24f22cf4..73d347206fd 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,6 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" +#include #if defined(DAAL_INTEL_CPP_COMPILER) #include @@ -151,6 +152,7 @@ Status KNNClassificationPredictKernel::compu const Model * const model = static_cast(m); const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); const NumericTable * labels = nullptr; @@ -248,7 +250,7 @@ Status KNNClassificationPredictKernel::compu for (size_t i = 0; i < last - first; ++i) { - DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, + DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, nodes, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays)); DAAL_CHECK_STATUS_THR( predict(&dy[i * yColumnCount], local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); @@ -259,7 +261,7 @@ Status KNNClassificationPredictKernel::compu { for (size_t i = 0; i < last - first; ++i) { - DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, kdTreeTable, + DAAL_CHECK_STATUS_THR(findNearestNeighbors(&dx[i * xColumnCount], local->heap, local->stack, k, radius, nodes, rootTreeNodeIndex, data, isHomogenSOA, soa_arrays)); DAAL_CHECK_STATUS_THR(predict(nullptr, local->heap, labels, k, voteWeights, modelIndices, indicesBD, distancesBD, i, nClasses)); } @@ -350,7 +352,7 @@ template services::Status KNNClassificationPredictKernel::findNearestNeighbors( const algorithmFpType * query, Heap, cpu> & heap, kdtree_knn_classification::internal::Stack, cpu> & stack, size_t k, algorithmFpType radius, - const KDTreeTable & kdTreeTable, size_t rootTreeNodeIndex, const NumericTable & data, const bool isHomogenSOA, + const KDTreeNode * nodes, size_t rootTreeNodeIndex, const NumericTable & data, const bool isHomogenSOA, services::internal::TArrayScalable & soa_arrays) { heap.reset(); @@ -358,7 +360,6 @@ services::Status KNNClassificationPredictKernel curNeighbor; size_t i; SearchNode cur, toPush; - const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); const KDTreeNode * node; cur.nodeIndex = rootTreeNodeIndex; cur.minDistance = 0; @@ -372,6 +373,7 @@ services::Status KNNClassificationPredictKerneldimension == __KDTREE_NULLDIMENSION) { + std::cout<<"here __KDTREE_NULLDIMENSION"<leftIndex; end = node->rightIndex; computeDistance(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); @@ -404,6 +406,7 @@ services::Status KNNClassificationPredictKerneldimension]; const algorithmFpType diff = val - node->cutPoint; - if (cur.minDistance <= radius) + if (false) { cur.nodeIndex = (diff < 0) ? node->leftIndex : node->rightIndex; toPush.nodeIndex = (diff < 0) ? node->rightIndex : node->leftIndex; @@ -425,6 +429,7 @@ services::Status KNNClassificationPredictKernel Date: Fri, 18 Oct 2024 04:54:57 -0700 Subject: [PATCH 32/35] fixes for knn --- ...ication_predict_dense_default_batch_impl.i | 79 ++++++++++++------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 73d347206fd..813c70106eb 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -153,6 +153,27 @@ Status KNNClassificationPredictKernel::compu const Model * const model = static_cast(m); const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); + const size_t xRowCount = x->getNumberOfRows(); + + + + const algorithmFpType base = 2.0; + const algorithmFpType baseInPower = Math::sPowx(base, Math::sCeil(Math::sLog(base * xRowCount - 1) / Math::sLog(base))); + DAAL_ASSERT(baseInPower > 0) + const size_t maxKDTreeNodeCount = ((size_t)baseInPower * __KDTREE_MAX_NODE_COUNT_MULTIPLICATION_FACTOR) / __KDTREE_LEAF_BUCKET_SIZE + 1; + for(int index = 0; index < maxKDTreeNodeCount; index++){ + const KDTreeNode& node = nodes[index]; + + + std::cout << "Node Index: " << index + << ", Dimension: " << node.dimension + << ", Cut Point: " << node.cutPoint + << ", Left Index: " << node.leftIndex + << ", Right Index: " << node.rightIndex << std::endl; + } + + + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); const NumericTable * labels = nullptr; @@ -170,8 +191,8 @@ Status KNNClassificationPredictKernel::compu } const size_t heapSize = (iSize / 16 + 1) * 16; - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; + // const size_t xRowCount = x->getNumberOfRows(); + // const algorithmFpType base = 2.0; const size_t expectedMaxDepth = (Math::xsLog(xRowCount) / Math::xsLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; const size_t stackSize = Math::xsPowx(base, Math::xsCeil(Math::xsLog(expectedMaxDepth) / Math::xsLog(base))); struct Local @@ -371,37 +392,36 @@ services::Status KNNClassificationPredictKerneldimension == __KDTREE_NULLDIMENSION) + if (node->dimension >1000) { - std::cout<<"here __KDTREE_NULLDIMENSION"<leftIndex; end = node->rightIndex; computeDistance(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - // for (i = start; i < end; ++i) - // { - // if (distance[i - start] <= radius) - // { - // curNeighbor.distance = distance[i - start]; - // curNeighbor.index = i; - // if (heap.size() < k) - // { - // heap.push(curNeighbor, k); - - // if (heap.size() == k) - // { - // radius = heap.getMax()->distance; - // } - // } - // else - // { - // if (heap.getMax()->distance > curNeighbor.distance) - // { - // heap.replaceMax(curNeighbor); - // radius = heap.getMax()->distance; - // } - // } - // } - // } + for (i = start; i < end; ++i) + { + if (distance[i - start] <= radius) + { + curNeighbor.distance = distance[i - start]; + curNeighbor.index = i; + if (heap.size() < k) + { + heap.push(curNeighbor, k); + + if (heap.size() == k) + { + radius = heap.getMax()->distance; + } + } + else + { + if (heap.getMax()->distance > curNeighbor.distance) + { + heap.replaceMax(curNeighbor); + radius = heap.getMax()->distance; + } + } + } + } if (!stack.empty()) { @@ -415,7 +435,6 @@ services::Status KNNClassificationPredictKerneldimension]; const algorithmFpType diff = val - node->cutPoint; if (false) From e9218e347c388bcd5597ac51ac2d1b95105c022e Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Fri, 18 Oct 2024 04:58:44 -0700 Subject: [PATCH 33/35] minor fix --- ...tree_knn_classification_predict_dense_default_batch_impl.i | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 813c70106eb..17e1038b664 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -397,7 +397,7 @@ services::Status KNNClassificationPredictKernelleftIndex; end = node->rightIndex; computeDistance(start, end, distance, query, isHomogenSOA, data, xBD, soa_arrays); - for (i = start; i < end; ++i) + for (i = start; i < end; i++) { if (distance[i - start] <= radius) { @@ -437,7 +437,7 @@ services::Status KNNClassificationPredictKerneldimension]; const algorithmFpType diff = val - node->cutPoint; - if (false) + if (cur.minDistance <= radius) { cur.nodeIndex = (diff < 0) ? node->leftIndex : node->rightIndex; toPush.nodeIndex = (diff < 0) ? node->rightIndex : node->leftIndex; From b07b0279dd526d082ae1a415265ad4111d2c5fb2 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 21 Oct 2024 04:58:19 -0700 Subject: [PATCH 34/35] train threads 1 --- .../kdtree_knn_classification_train_dense_default_impl.i | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index fe74c6880a7..1e0ff09ca99 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -160,6 +160,8 @@ Status KNNClassificationTrainBatchKernel q; BBox * bboxQ = nullptr; + auto oldThreads = services::Environment::getInstance()->getNumberOfThreads(); + services::Environment::getInstance()->setNumberOfThreads(1); DAAL_CHECK_STATUS(status, buildFirstPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); DAAL_CHECK_STATUS(status, buildSecondPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); DAAL_CHECK_STATUS(status, rearrangePoints(*x, indexes)); @@ -167,7 +169,7 @@ Status KNNClassificationTrainBatchKernelsetNumberOfThreads(oldThreads); daal_free(bboxQ); bboxQ = nullptr; return status; From 94aab865908e738c7ba1bb0ea9986591cf5eeac1 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 22 Oct 2024 04:24:24 -0700 Subject: [PATCH 35/35] fixes for threading --- ..._classification_train_dense_default_impl.i | 127 +++++++++++++----- 1 file changed, 91 insertions(+), 36 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index 1e0ff09ca99..b2f50feb4b7 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -61,39 +61,72 @@ using namespace kdtree_knn_classification::internal; template class Queue { + static const size_t defaultSize = 4; public: - Queue() : _data(nullptr) {} + Queue() : _data(nullptr), _first(0), _last(0), _count(0), _size(0), _capacity(0) {} ~Queue() { - services::daal_free(_data); - _data = nullptr; + clear(); } + Queue(const Queue &) = delete; + Queue & operator=(const Queue &) = delete; + bool init(size_t size) { clear(); + if (size == 0) // Check for valid size + { + return false; + } + _first = _count = 0; _last = _sizeMinus1 = (_size = size) - 1; - return ((_data = static_cast(service_malloc(size * sizeof(T)))) != nullptr); + _data = static_cast(service_malloc(size)); + + if (!_data) // Check if memory allocation was successful + { + return false; + } + + _capacity = _size; // Initialize capacity + return true; } void clear() { - daal_free(_data); - _data = nullptr; + if (_data) + { + daal::services::internal::service_free(_data); // Free allocated memory if it exists + _data = nullptr; + } + _first = _last = _count = _size = _sizeMinus1 = _capacity = 0; // Reset state } + void reset() { _first = _last = _count = 0; } + DAAL_FORCEINLINE void push(const T & value) { - _data[_last = (_last + 1) & _sizeMinus1] = value; + if (_count >= _capacity) // Check if capacity is exceeded + { + services::Status status = grow(); // Grow if necessary + //DAAL_CHECK_STATUS_VAR(status); + } + + _data[_last = (_last + 1) & _sizeMinus1] = value; // Add element to queue ++_count; } DAAL_FORCEINLINE T pop() { - const T value = _data[_first++]; - _first *= (_first != _size); + // if (empty()) // Check if queue is empty + // { + // throw std::underflow_error("Queue underflow: no elements to pop."); + // } + + const T value = _data[_first++]; // Retrieve element + _first *= (_first != _size); // Reset first index if it reaches the end --_count; return value; } @@ -102,13 +135,36 @@ public: size_t size() const { return _count; } + private: + services::Status grow() + { + int result = 0; + _capacity = (_capacity == 0 ? defaultSize : _capacity * 2); // Double capacity or set to default + + T * const newData = daal::services::internal::service_malloc(_capacity); + DAAL_CHECK_MALLOC(newData); + + if (_data != nullptr) + { + result = services::internal::daal_memcpy_s(newData, _last * sizeof(T), _data, _last * sizeof(T)); + daal::services::internal::service_free(_data); // Free old data + _data = nullptr; + } + + _data = newData; // Assign new expanded memory + _size = _capacity; // Adjust size to new capacity + _sizeMinus1 = _capacity - 1; // Update size minus 1 for wrapping + return (!result) ? services::Status() : services::Status(services::ErrorMemoryCopyFailedInternal); + } + T * _data; - size_t _first; - size_t _last; - size_t _count; - size_t _size; - size_t _sizeMinus1; + size_t _first; // Index of the first element + size_t _last; // Index of the last element + size_t _count; // Current number of elements + size_t _size; // Current size of the queue + size_t _sizeMinus1; // Helper for wrap-around logic + size_t _capacity; // Maximum capacity of the queue }; struct BuildNode @@ -161,15 +217,15 @@ Status KNNClassificationTrainBatchKernel q; BBox * bboxQ = nullptr; auto oldThreads = services::Environment::getInstance()->getNumberOfThreads(); - services::Environment::getInstance()->setNumberOfThreads(1); DAAL_CHECK_STATUS(status, buildFirstPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); + services::Environment::getInstance()->setNumberOfThreads(1); DAAL_CHECK_STATUS(status, buildSecondPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); + services::Environment::getInstance()->setNumberOfThreads(oldThreads); DAAL_CHECK_STATUS(status, rearrangePoints(*x, indexes)); if (y) { DAAL_CHECK_STATUS(status, rearrangePoints(*y, indexes)); } - services::Environment::getInstance()->setNumberOfThreads(oldThreads); daal_free(bboxQ); bboxQ = nullptr; return status; @@ -185,10 +241,9 @@ Status KNNClassificationTrainBatchKernel Math; typedef BoundingBox BBox; - const auto maxThreads = threader_get_threads_number(); const algorithmFpType base = 2.0; const size_t queueSize = - 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD * maxThreads) / Math::sLog(base))); + 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD) / Math::sLog(base))); const size_t firstPartLeafNodeCount = queueSize / 2; q.init(queueSize); const size_t xColumnCount = x.getNumberOfColumns(); @@ -198,7 +253,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(bboxSize * sizeof(BBox), sizeof(BBox))); + bboxQ = static_cast(service_malloc(bboxSize)); DAAL_CHECK_MALLOC(bboxQ) r.impl()->setLastNodeIndex(0); @@ -223,7 +278,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(algorithmFpType))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); DAAL_CHECK_MALLOC(subSamples) while (maxNodeCountForCurrentDepth < firstPartLeafNodeCount) @@ -716,8 +771,8 @@ size_t KNNClassificationTrainBatchKernel(service_malloc(idxMultiplier * (blockCount + 1) * sizeof(size_t))); - size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount * sizeof(size_t))); + size_t * leftSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * (blockCount + 1))); + size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount)); if (!leftSegmentStartPerBlock || !rightSegmentStartPerBlock) { @@ -849,7 +904,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(xRowCount * sizeof(algorithmFpType))))); + (buffer = static_cast(service_malloc(xRowCount)))); if (!awx) { status.add(services::ErrorMemoryAllocationFailed); @@ -931,10 +986,10 @@ Status KNNClassificationTrainBatchKernel(service_malloc(q.size() * sizeof(BuildNode))); + BuildNode * bnQ = static_cast(service_malloc(q.size())); DAAL_CHECK_MALLOC(bnQ) size_t posQ = 0; while (q.size() > 0) @@ -972,7 +1027,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc((maxThreads + 1) * sizeof(*firstNodeIndex))); + size_t * firstNodeIndex = static_cast(service_malloc((maxThreads + 1))); DAAL_CHECK_MALLOC(firstNodeIndex) size_t nodeIndex = lastNodeIndex; for (size_t i = 0; i < maxThreads; ++i) @@ -991,7 +1046,7 @@ Status KNNClassificationTrainBatchKernelbboxes = service_scalable_calloc(ptr->bboxesCapacity * xColumnCount)) != nullptr) && ((ptr->inSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) && ((ptr->outSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) - && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity * sizeof(size_t)))) != nullptr) + && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity))) != nullptr) && ptr->buildStack.init(stackSize))) { status.add(services::ErrorMemoryAllocationFailed); @@ -1090,7 +1145,7 @@ Status KNNClassificationTrainBatchKernelfixupQueueIndex >= local->fixupQueueCapacity) { const size_t newCapacity = local->fixupQueueCapacity * 2; - size_t * const newQueue = static_cast(service_malloc(newCapacity * sizeof(size_t))); + size_t * const newQueue = static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newQueue, services::ErrorMemoryAllocationFailed); result |= daal::services::internal::daal_memcpy_s(newQueue, newCapacity * sizeof(size_t), local->fixupQueue, local->fixupQueueIndex * sizeof(size_t)); @@ -1129,13 +1184,13 @@ Status KNNClassificationTrainBatchKernelextraKDTreeNodesCapacity > 0 ? local->extraKDTreeNodesCapacity * 2 : static_cast(1024), extraIndex + 1); KDTreeNode * const newNodes = - static_cast(service_malloc(newCapacity * sizeof(KDTreeNode))); + static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newNodes, services::ErrorMemoryAllocationFailed); - result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity * sizeof(KDTreeNode), + result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity, local->extraKDTreeNodes, - local->extraKDTreeNodesCapacity * sizeof(KDTreeNode)); + local->extraKDTreeNodesCapacity); KDTreeNode * oldNodes = local->extraKDTreeNodes; local->extraKDTreeNodes = newNodes; local->extraKDTreeNodesCapacity = newCapacity; @@ -1147,7 +1202,7 @@ Status KNNClassificationTrainBatchKernelextraKDTreeNodesCapacity = max(extraIndex + 1, static_cast(1024)); local->extraKDTreeNodes = static_cast( - service_malloc(local->extraKDTreeNodesCapacity * sizeof(KDTreeNode))); + service_malloc(local->extraKDTreeNodesCapacity)); DAAL_CHECK_THR(local->extraKDTreeNodes, services::ErrorMemoryAllocationFailed); } @@ -1358,7 +1413,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(sampleCount * sizeof(*samples))); + algorithmFpType * samples = static_cast(service_malloc(sampleCount)); if (!samples) { status = services::ErrorMemoryAllocationFailed; @@ -1383,7 +1438,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(sampleCount, samples); - size_t * hist = static_cast(service_malloc(sampleCount * sizeof(*hist))); + size_t * hist = static_cast(service_malloc(sampleCount)); if (!hist) { status = services::ErrorMemoryAllocationFailed; @@ -1396,7 +1451,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(*subSamples))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); if (!subSamples) { status = services::ErrorMemoryAllocationFailed;