From ed3661d9f651fdfd5d1589e5882d143f280b17e9 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Sun, 8 Feb 2026 16:42:15 +0100 Subject: [PATCH] [df] Deprecate confusing signatures of HistoN[Sparse]D The signatures of HistoN[Sparse]D operations allow passing the column that will provide the weights as the last element in the list of input columns, practically allowing for a size which is one more than the number of dimensions of the histogram to be filled. This was done to align these signatures with the logic of THnBase::Fill that follows the same schema. But these signatures are also different than all other Histo* signatures that accept weight columns as separate arguments. Thus, they are aligned to the other signatures by adding a new optional argument for the name of the column that will provide the weights. The previous usage of the operations is deprecated with a warning. Add tests. --- tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 132 ++++++++++++++---- tree/dataframe/test/dataframe_histomodels.cxx | 88 ++++++++++++ 2 files changed, 196 insertions(+), 24 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx index fb5422d2a8778..46afaa1df4ca6 100644 --- a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx @@ -2216,7 +2216,7 @@ public: /// \param[in] model The returned histogram will be constructed using this as a model. /// \param[in] columnList /// A list containing the names of the columns that will be passed when calling `Fill`. - /// (N columns for unweighted filling, or N+1 columns for weighted filling) + /// \param[in] wName The name of the column that will provide the weights. /// \return the N-dimensional histogram wrapped in a RResultPtr. /// /// This action is *lazy*: upon invocation of this method the calculation is @@ -2230,18 +2230,38 @@ public: /// ~~~ /// template // need FirstColumn to disambiguate overloads - RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) + RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList, std::string_view wName = "") { std::shared_ptr<::THnD> h(nullptr); { ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); h = model.GetHistogram(); + const auto hDims = h->GetNdimensions(); + decltype(hDims) nCols = columnList.size(); + + if (!wName.empty() && nCols == hDims + 1) + throw std::invalid_argument("The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoND(model, cols, weightCol)'."); - if (int(columnList.size()) == (h->GetNdimensions() + 1)) { + if (nCols == hDims + 1) + Warning("HistoND", "Passing the column with the weights as the last column in the list is deprecated. " + "Instead, pass it as a separate argument, e.g. 'HistoND(model, cols, weightCol)'."); + + if (!wName.empty() || nCols == hDims + 1) h->Sumw2(); - } else if (int(columnList.size()) != h->GetNdimensions()) { - throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); - } + + if (!(nCols == hDims + 1) && nCols != hDims) + throw std::invalid_argument("Wrong number of columns for the specified number of histogram axes."); + } + + if (!wName.empty()) { + // The action helper will invoke THnBase::Fill overload that performs weighted filling in case the number of + // passed arguments is one more the number of dimensions of the histogram. + ColumnNames_t userColumns = columnList; + userColumns.push_back(std::string{wName}); + return CreateAction(userColumns, h, h, + fProxiedPtr); } return CreateAction(columnList, h, h, fProxiedPtr); @@ -2251,7 +2271,7 @@ public: /// \brief Fill and return an N-dimensional histogram (*lazy action*). /// \param[in] model The returned histogram will be constructed using this as a model. /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` - /// (N columns for unweighted filling, or N+1 columns for weighted filling) + /// \param[in] wName The name of the column that will provide the weights. /// \return the N-dimensional histogram wrapped in a RResultPtr. /// /// This action is *lazy*: upon invocation of this method the calculation is @@ -2264,18 +2284,38 @@ public: /// {"col0", "col1", "col2", "col3"}); /// ~~~ /// - RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) + RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList, std::string_view wName = "") { std::shared_ptr<::THnD> h(nullptr); { ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); h = model.GetHistogram(); + const auto hDims = h->GetNdimensions(); + decltype(hDims) nCols = columnList.size(); + + if (!wName.empty() && nCols == hDims + 1) + throw std::invalid_argument("The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoND(model, cols, weightCol)'."); - if (int(columnList.size()) == (h->GetNdimensions() + 1)) { + if (nCols == hDims + 1) + Warning("HistoND", "Passing the column with the weights as the last column in the list is deprecated. " + "Instead, pass it as a separate argument, e.g. 'HistoND(model, cols, weightCol)'."); + + if (!wName.empty() || nCols == hDims + 1) h->Sumw2(); - } else if (int(columnList.size()) != h->GetNdimensions()) { - throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); - } + + if (!(nCols == hDims + 1) && nCols != hDims) + throw std::invalid_argument("Wrong number of columns for the specified number of histogram axes."); + } + + if (!wName.empty()) { + // The action helper will invoke THnBase::Fill overload that performs weighted filling in case the number of + // passed arguments is one more the number of dimensions of the histogram. + ColumnNames_t userColumns = columnList; + userColumns.push_back(std::string{wName}); + return CreateAction(userColumns, h, h, fProxiedPtr, + userColumns.size()); } return CreateAction(columnList, h, h, fProxiedPtr, columnList.size()); @@ -2290,7 +2330,7 @@ public: /// \param[in] model The returned histogram will be constructed using this as a model. /// \param[in] columnList /// A list containing the names of the columns that will be passed when calling `Fill`. - /// (N columns for unweighted filling, or N+1 columns for weighted filling) + /// \param[in] wName The name of the column that will provide the weights. /// \return the N-dimensional histogram wrapped in a RResultPtr. /// /// This action is *lazy*: upon invocation of this method the calculation is @@ -2304,18 +2344,40 @@ public: /// ~~~ /// template // need FirstColumn to disambiguate overloads - RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList) + RResultPtr<::THnSparseD> + HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList, std::string_view wName = "") { std::shared_ptr<::THnSparseD> h(nullptr); { ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); h = model.GetHistogram(); + const auto hDims = h->GetNdimensions(); + decltype(hDims) nCols = columnList.size(); + + if (!wName.empty() && nCols == hDims + 1) + throw std::invalid_argument("The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoNSparseD(model, cols, weightCol)'."); - if (int(columnList.size()) == (h->GetNdimensions() + 1)) { + if (nCols == hDims + 1) + Warning("HistoNSparseD", + "Passing the column with the weights as the last column in the list is deprecated. " + "Instead, pass it as a separate argument, e.g. 'HistoNSparseD(model, cols, weightCol)'."); + + if (!wName.empty() || nCols == hDims + 1) h->Sumw2(); - } else if (int(columnList.size()) != h->GetNdimensions()) { - throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); - } + + if (!(nCols == hDims + 1) && nCols != hDims) + throw std::invalid_argument("Wrong number of columns for the specified number of histogram axes."); + } + + if (!wName.empty()) { + // The action helper will invoke THnBase::Fill overload that performs weighted filling in case the number of + // passed arguments is one more the number of dimensions of the histogram. + ColumnNames_t userColumns = columnList; + userColumns.push_back(std::string{wName}); + return CreateAction(userColumns, h, h, + fProxiedPtr); } return CreateAction(columnList, h, h, fProxiedPtr); @@ -2325,7 +2387,7 @@ public: /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*). /// \param[in] model The returned histogram will be constructed using this as a model. /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` - /// (N columns for unweighted filling, or N+1 columns for weighted filling) + /// \param[in] wName The name of the column that will provide the weights. /// \return the N-dimensional histogram wrapped in a RResultPtr. /// /// This action is *lazy*: upon invocation of this method the calculation is @@ -2338,18 +2400,40 @@ public: /// {"col0", "col1", "col2", "col3"}); /// ~~~ /// - RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList) + RResultPtr<::THnSparseD> + HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList, std::string_view wName = "") { std::shared_ptr<::THnSparseD> h(nullptr); { ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); h = model.GetHistogram(); + const auto hDims = h->GetNdimensions(); + decltype(hDims) nCols = columnList.size(); + + if (!wName.empty() && nCols == hDims + 1) + throw std::invalid_argument("The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoNSparseD(model, cols, weightCol)'."); - if (int(columnList.size()) == (h->GetNdimensions() + 1)) { + if (nCols == hDims + 1) + Warning("HistoNSparseD", + "Passing the column with the weights as the last column in the list is deprecated. " + "Instead, pass it as a separate argument, e.g. 'HistoNSparseD(model, cols, weightCol)'."); + + if (!wName.empty() || nCols == hDims + 1) h->Sumw2(); - } else if (int(columnList.size()) != h->GetNdimensions()) { - throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); - } + + if (!(nCols == hDims + 1) && nCols != hDims) + throw std::invalid_argument("Wrong number of columns for the specified number of histogram axes."); + } + + if (!wName.empty()) { + // The action helper will invoke THnBase::Fill overload that performs weighted filling in case the number of + // passed arguments is one more the number of dimensions of the histogram. + ColumnNames_t userColumns = columnList; + userColumns.push_back(std::string{wName}); + return CreateAction( + userColumns, h, h, fProxiedPtr, userColumns.size()); } return CreateAction( columnList, h, h, fProxiedPtr, columnList.size()); diff --git a/tree/dataframe/test/dataframe_histomodels.cxx b/tree/dataframe/test/dataframe_histomodels.cxx index f7d01ca3597c2..5ea6adfb057fe 100644 --- a/tree/dataframe/test/dataframe_histomodels.cxx +++ b/tree/dataframe/test/dataframe_histomodels.cxx @@ -1,3 +1,4 @@ +#include "ROOT/TestSupport.hxx" #include "ROOT/RDataFrame.hxx" #include "ROOT/TSeq.hxx" @@ -407,3 +408,90 @@ TEST(RDataFrameHisto, FillVecBool) EXPECT_EQ(h->GetBinContent(2), n); EXPECT_EQ(h->GetBinContent(3), 0u); } + +TEST(RDataFrameHistoModels, HistoNDWeight) +{ + ROOT::RDataFrame tdf(10); + auto x = 0.; + auto d = tdf.Define("x0", [&x]() { return x++; }) + .Define("x1", [&x]() { return x + .1; }) + .Define("x2", [&x]() { return x + .1; }) + .Define("x3", [&x]() { return x + .1; }) + .Define("w", []() { return 0.5; }); + int nbins[4] = {10, 5, 2, 2}; + double xmin[4] = {0., 0., 0., 0.}; + double xmax[4] = {10., 10., 10., 10.}; + + // Passing a list of columns with an extra name for the weights is deprecated + ROOT_EXPECT_WARNING(d.HistoND(::THnD("w", "w", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3", "w"}), "HistoND", + "Passing the column with the weights as the last column in the list is deprecated. Instead, " + "pass it as a separate argument, e.g. 'HistoND(model, cols, weightCol)'."); + + // Passing both a list with the extra weight column and also the weight column as a separate argument throws + EXPECT_THROW( + try { + d.HistoND(::THnD("e", "e", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3", "w"}, "w"); + } catch (const std::invalid_argument &err) { + EXPECT_EQ(std::string(err.what()), + "The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoND(model, cols, weightCol)'."); + throw; + }, + std::invalid_argument); + + auto hw = d.HistoND(::THnD("hw", "hw", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3"}, "w"); + std::vector ref0({0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}); + std::vector ref1({0., 2., 4., 6., 8., 10.}); + std::vector ref2({0., 5., 10.}); + std::vector ref3({0., 5., 10.}); + + std::vector> ref = {ref0, ref1, ref2, ref3}; + for (unsigned int idim = 0; idim < ref.size(); ++idim) { + CheckBins(hw->GetAxis(idim), ref[idim]); + } +} + +TEST(RDataFrameHistoModels, HistoNSparseDWeight) +{ + ROOT::RDataFrame tdf(10); + auto x = 0.; + auto d = tdf.Define("x0", [&x]() { return x++; }) + .Define("x1", [&x]() { return x + .1; }) + .Define("x2", [&x]() { return x + .1; }) + .Define("x3", [&x]() { return x + .1; }) + .Define("w", []() { return 0.5; }); + int nbins[4] = {10, 5, 2, 2}; + double xmin[4] = {0., 0., 0., 0.}; + double xmax[4] = {10., 10., 10., 10.}; + + // Passing a list of columns with an extra name for the weights is deprecated + ROOT_EXPECT_WARNING(d.HistoNSparseD(::THnSparseD("w", "w", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3", "w"}), + "HistoNSparseD", + "Passing the column with the weights as the last column in the list is deprecated. Instead, " + "pass it as a separate argument, e.g. 'HistoNSparseD(model, cols, weightCol)'."); + + // Passing both a list with the extra weight column and also the weight column as a separate argument throws + EXPECT_THROW( + try { + d.HistoNSparseD(::THnSparseD("e", "e", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3", "w"}, "w"); + } catch (const std::invalid_argument &err) { + EXPECT_EQ(std::string(err.what()), + "The weight column was passed as an argument and at the same time the list of " + "input columns contains one column more than the number of dimensions of the " + "histogram. Call as 'HistoNSparseD(model, cols, weightCol)'."); + throw; + }, + std::invalid_argument); + + auto hw = d.HistoNSparseD(::THnSparseD("hw", "hw", 4, nbins, xmin, xmax), {"x0", "x1", "x2", "x3"}, "w"); + std::vector ref0({0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}); + std::vector ref1({0., 2., 4., 6., 8., 10.}); + std::vector ref2({0., 5., 10.}); + std::vector ref3({0., 5., 10.}); + + std::vector> ref = {ref0, ref1, ref2, ref3}; + for (unsigned int idim = 0; idim < ref.size(); ++idim) { + CheckBins(hw->GetAxis(idim), ref[idim]); + } +}