taehyounpark
diff --git a/‎docs/images/pth.png
25.3 KB b/‎docs/images/pth.png
25.3 KB
diff --git a/‎docs/pages/conceptual.md
Lines changed: 6 additions & 4 deletions b/‎docs/pages/conceptual.md
Lines changed: 6 additions & 4 deletions
diff --git a/‎docs/pages/example.md
Lines changed: 139 additions & 78 deletions b/‎docs/pages/example.md
Lines changed: 139 additions & 78 deletions
diff --git a/‎docs/pages/guide.md
Lines changed: 6 additions & 6 deletions b/‎docs/pages/guide.md
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/example-03.cxx
Lines changed: 0 additions & 4 deletions b/‎examples/example-03.cxx
Lines changed: 0 additions & 4 deletions
@@ -10,15 +10,17 @@ A `dataflow` consists of a directed, acyclic graph of tasks performed for each e
 An action is a node belonging to one of three task sub-graphs, each of which are associated with a set of applicable methods.
 Actions of each task graph can receive ones of the previous graphs as inputs:
 
-| Action | Description | Methods | Description | Task Graph | Input actions |
+| Action | Description | Methods | Description | Task Graph | Inputs (optional) |
 | :--- | :-- | :-- | :-- | :-- | :-- | 
 | `column` | Quantity of interest | `read()` | Read a column. | Computation | (`column`) |
-| | | `define()` | Evaluate a column. | | |
+| | | `define()` | Compute a column. | | |
 | `selection` | Boolean decision | `filter()` | Apply a cut. | Cutflow | `column` |
 | | Floating-point decision | `weight()` | Apply a statistical significance. | | |
-| `query` | Perform a query | `make()` | Plan a query. | Experiment | `column` & `selection` |
+| | | `book()` | Perform a query at the selection. | | |
+| `query` | Perform a query | `get()` | Define an output. | Experiment | (`column`) & `selection` |
 | | | `fill()` | Populate with column value(s). | | |
-| | | `book()` | Perform over selected entries. | | |
+| | | `at()` | Perform over selected entries. | | |
+| | | `result()` | Get the result. | | |
 
 @section conceptual-lazy Lazy actions
 
 
@@ -209,19 +209,19 @@ auto [yield_a, yield_b, yield_c] =
 
 @section example-hep More examples
 
-- [HepQuery](https://github.com/taehyounpark/queryosity-hep)
+[HepQuery](https://github.com/taehyounpark/queryosity-hep) provides the extensions for ROOT TTree datasets and ROOT `TH1`-based outputs.
 
 @subsection example-hep-hww ROOT TTree
 
 - Simulated ggF HWW* events: [ATLAS open data](https://opendata.cern.ch/record/3825).
 
 1. Apply the MC event weight.
 2. Select entries for which there are exactly two opposite-sign leptons in the event.
-3. Separate into different/same-flavour channel for electrons and muons.
-  - @f$m_{\ell\ell} > 12(10)\,\mathrm{GeV}@f$ for same(different)-flavour.
-5. Merge channels to form flavour-inclusive opposite-sign region.
+3. Separate into different/same-flavour channels for electrons and muons.
+4. Require @f$m_{\ell\ell} > 10(12)\,\mathrm{GeV}@f$ for different(same)-flavour channel.
+5. Merge channels to form flavour-inclusive opposite-sign region post-@f$m_{\ell\ell}@f$ cut.
 6. In each region, plot the distribution of @f$p_{\mathrm{T}}^H = \left| \mathbf{p}_{\mathrm{T}}^{\ell\ell} + \mathbf{p}_{\mathrm{T}}^{\mathrm{miss}} \right|@f$.
-	- Scale lepton energy scale by @f$\pm 2\,\%@f$ as systematic variations.
+	- Scale electron(muon) energy scale by @f$\pm 1(2)\,\%@f$ as systematic variations.
 
 @cpp
 #include "HepQuery/Hist.h"
@@ -257,15 +257,15 @@ using P4 = ROOT::Math::PtEtaPhiEVector;
 #include <sstream>
 
 // compute the nth-leading four-momentum out of (pt, eta, phi, m) arrays
-class NthP4 : public column::definition<P4(VecD, VecD, VecD, VecD)> {
+class NthP4 : public column::definition<P4(VecF, VecF, VecF, VecF)> {
 
 public:
   NthP4(unsigned int index) : m_index(index) {}
   virtual ~NthP4() = default;
 
-  virtual P4 evaluate(column::observable<VecD> pt, column::observable<VecD> eta,
-                      column::observable<VecD> phi,
-                      column::observable<VecD> es) const override {
+  virtual P4 evaluate(column::observable<VecF> pt, column::observable<VecF> eta,
+                      column::observable<VecF> phi,
+                      column::observable<VecF> es) const override {
     return P4(pt->at(m_index), eta->at(m_index), phi->at(m_index),
               es->at(m_index));
   }
@@ -276,62 +276,106 @@ protected:
 
 int main() {
 
+  // load dataset (not enough events to multithread)
   std::vector<std::string> tree_files{"hww.root"};
   std::string tree_name = "mini";
-
-  // not enough events to multithread
   dataflow df(multithread::disable());
   auto ds = df.load(dataset::input<HepQ::Tree>(tree_files, tree_name));
 
   // weights
   auto mc_weight = ds.read(dataset::column<float>("mcWeight"));
-  auto mu_sf = ds.read(dataset::column<float>("scaleFactor_MUON"));
-  auto el_sf = ds.read(dataset::column<float>("scaleFactor_ELE"));
-
+  auto mu_SF = ds.read(dataset::column<float>("scaleFactor_MUON"));
+  auto el_SF = ds.read(dataset::column<float>("scaleFactor_ELE"));
   // leptons
-  auto [lep_pt_MeV, lep_eta, lep_phi, lep_E_MeV, lep_Q, lep_type] = ds.read(
+  auto [lep_pT, lep_eta, lep_phi, lep_E, lep_Q, lep_type] = ds.read(
       dataset::column<VecF>("lep_pt"), dataset::column<VecF>("lep_eta"),
       dataset::column<VecF>("lep_phi"), dataset::column<VecF>("lep_E"),
       dataset::column<VecF>("lep_charge"), dataset::column<VecUI>("lep_type"));
-
   // missing transverse energy
-  auto [met_MeV, met_phi] = ds.read(dataset::column<float>("met_et"),
+  auto [met, met_phi] = ds.read(dataset::column<float>("met_et"),
                                     dataset::column<float>("met_phi"));
-
   // units
-  auto MeV = df.define(column::constant(1000.0));
-  auto lep_pt = lep_pt_MeV / MeV;
-  auto lep_E = lep_E_MeV / MeV;
-  auto met = met_MeV / MeV;
-
-  // vary the energy scale by +/-2%
-  auto Escale = df.vary(column::expression([](VecD E) { return E; }),
-                        {{"eg_up", [](VecD E) { return E * 1.02; }},
-                         {"eg_dn", [](VecD E) { return E * 0.98; }}});
-
-  // apply the energy scale (uncertainties) and select within acceptance
-  auto lep_pt_min = df.define(column::constant(15.0));
+  auto MeV = df.define(column::constant<float>(1000.0));
+  lep_pT = lep_pT / MeV;
+  lep_E = lep_E / MeV;
+  met = met / MeV;
+
+  // select electrons
+  auto el_sel = lep_type == df.define(column::constant(11));
+  auto el_pT_nom = lep_pT[el_sel];
+  auto el_eta = lep_eta[el_sel];
+  auto el_phi = lep_phi[el_sel];
+  auto el_E_nom = lep_E[el_sel];
+  auto el_Q = lep_Q[el_sel];
+  auto el_type = lep_type[el_sel];
+  // select muons
+  auto mu_sel = lep_type == df.define(column::constant(13));
+  auto mu_pT_nom = lep_pT[mu_sel];
+  auto mu_eta = lep_eta[mu_sel];
+  auto mu_phi = lep_phi[mu_sel];
+  auto mu_E_nom = lep_E[mu_sel];
+  auto mu_Q = lep_Q[mu_sel];
+  auto mu_type = lep_type[mu_sel];
+
+  // vary the energy scale by +/-1(2)% for electrons(muons)
+  auto el_scale = df.vary(column::expression([](VecF const &E) { return E; }),
+                          {{"el_up", [](VecF const &E) { return E * 1.01; }},
+                           {"el_dn", [](VecF const &E) { return E * 0.99; }}});
+  auto mu_scale = df.vary(column::expression([](VecF const &E) { return E; }),
+                          {{"mu_up", [](VecF const &E) { return E * 1.02; }},
+                           {"mu_dn", [](VecF const &E) { return E * 0.98; }}});
+  auto el_pT = el_scale(el_pT_nom);
+  auto el_E = el_scale(el_E_nom);
+  auto mu_pT = mu_scale(mu_pT_nom);
+  auto mu_E = mu_scale(mu_E_nom);
+
+  // re-concatenate into el+mu arrays
+  auto concat = [](VecF const &v1, VecF const &v2) {
+    return ROOT::VecOps::Concatenate(v1, v2);
+  };
+  auto el_mu_pT = df.define(column::expression(concat))(el_pT, mu_pT);
+  auto el_mu_eta = df.define(column::expression(concat))(el_eta, mu_eta);
+  auto el_mu_phi = df.define(column::expression(concat))(el_phi, mu_phi);
+  auto el_mu_E = df.define(column::expression(concat))(el_E, mu_E);
+  auto el_mu_Q = df.define(column::expression(concat))(el_Q, mu_Q);
+  auto el_mu_type = df.define(column::expression(concat))(el_type, mu_type);
+
+  // take sorted lepton arrays
+  auto take = df.define(column::expression([](VecF const &v, VecUI const &is) {
+    return ROOT::VecOps::Take(v, is);
+  }));
+  auto lep_indices = df.define(column::expression(
+      [](VecF const &v) { return ROOT::VecOps::Argsort(v); }))(el_mu_pT);
+  auto lep_pT_syst = take(el_mu_pT, lep_indices);
+  auto lep_eta_syst = take(el_mu_eta, lep_indices);
+  auto lep_phi_syst = take(el_mu_phi, lep_indices);
+  auto lep_E_syst = take(el_mu_E, lep_indices);
+  auto lep_Q_syst = take(el_mu_Q, lep_indices);
+  auto lep_type_syst = take(el_mu_type, lep_indices);
+
+  // apply acceptance selections
+  auto lep_pT_min = df.define(column::constant(15.0));
   auto lep_eta_max = df.define(column::constant(2.4));
-  auto lep_selection = (lep_eta < lep_eta_max) && (lep_eta > (-lep_eta_max)) &&
-                       (lep_pt > lep_pt_min);
-  auto lep_pt_sel = Escale(lep_pt)[lep_selection];
-  auto lep_E_sel = Escale(lep_E)[lep_selection];
-  auto lep_eta_sel = lep_eta[lep_selection];
-  auto lep_phi_sel = lep_phi[lep_selection];
-  auto lep_Q_sel = lep_Q[lep_selection];
-  auto lep_type_sel = lep_type[lep_selection];
+  auto lep_sel = (lep_eta_syst < lep_eta_max) &&
+                 (lep_eta_syst > (-lep_eta_max)) && (lep_pT_syst > lep_pT_min);
+  auto lep_pT_sel = lep_pT_syst[lep_sel];
+  auto lep_E_sel = lep_E_syst[lep_sel];
+  auto lep_eta_sel = lep_eta_syst[lep_sel];
+  auto lep_phi_sel = lep_phi_syst[lep_sel];
+  auto lep_Q_sel = lep_Q_syst[lep_sel];
+  auto lep_type_sel = lep_type_syst[lep_sel];
 
   // compute (sub-)leading lepton four-momentum
-  auto l1p4 = df.define(column::definition<NthP4>(0))(lep_pt_sel, lep_eta_sel,
+  auto l1p4 = df.define(column::definition<NthP4>(0))(lep_pT_sel, lep_eta_sel,
                                                       lep_phi_sel, lep_E_sel);
-  auto l2p4 = df.define(column::definition<NthP4>(1))(lep_pt_sel, lep_eta_sel,
+  auto l2p4 = df.define(column::definition<NthP4>(1))(lep_pT_sel, lep_eta_sel,
                                                       lep_phi_sel, lep_E_sel);
 
   // compute dilepton invariant mass & higgs transverse momentum
   auto llp4 = l1p4 + l2p4;
   auto mll =
       df.define(column::expression([](const P4 &p4) { return p4.M(); }))(llp4);
-  auto higgs_pt =
+  auto higgs_pT =
       df.define(column::expression([](const P4 &p4, float q, float q_phi) {
         TVector2 p2;
         p2.SetMagPhi(p4.Pt(), p4.Phi());
@@ -343,10 +387,10 @@ int main() {
   // compute number of leptons
   auto nlep_req = df.define(column::constant<unsigned int>(2));
   auto nlep_sel = df.define(column::expression(
-      [](VecD const &lep) { return lep.size(); }))(lep_pt_sel);
+      [](VecF const &lep) { return lep.size(); }))(lep_pT_sel);
 
   // apply MC event weight * electron & muon scale factors
-  auto weighted = df.weight(mc_weight * el_sf * mu_sf);
+  auto weighted = df.weight(mc_weight * el_SF * mu_SF);
 
   // require 2 opoosite-signed leptons
   auto cut_2l = weighted.filter(nlep_sel == nlep_req);
@@ -355,60 +399,77 @@ int main() {
   }))(lep_Q_sel);
 
   // branch out into differet/same-flavour channels
-  auto cut_2ldf = cut_2los.filter(column::expression([](const VecI &lep_type) {
+  auto cut_df = cut_2los.filter(column::expression([](const VecI &lep_type) {
     return lep_type[0] + lep_type[1] == 24;
   }))(lep_type_sel);
-  auto cut_2lsf = cut_2los.filter(column::expression([](const VecI &lep_type) {
-    return (lep_type[0] + lep_type[1] == 22) ||
-           (lep_type[0] + lep_type[1] == 26);
+  auto cut_ee = cut_2los.filter(column::expression([](const VecI &lep_type) {
+    return (lep_type[0] + lep_type[1] == 22);
+  }))(lep_type_sel);
+  auto cut_mm = cut_2los.filter(column::expression([](const VecI &lep_type) {
+    return (lep_type[0] + lep_type[1] == 26);
   }))(lep_type_sel);
 
   // apply (different) cuts for each channel
   auto mll_min_df = df.define(column::constant(10.0));
-  auto cut_2ldf_presel = cut_2ldf.filter(mll > mll_min_df);
+  auto cut_df_presel = cut_df.filter(mll > mll_min_df);
   auto mll_min_sf = df.define(column::constant(12.0));
-  auto cut_2lsf_presel = cut_2lsf.filter(mll > mll_min_sf);
+  auto cut_ee_presel = cut_ee.filter(mll > mll_min_sf);
+  auto cut_mm_presel = cut_mm.filter(mll > mll_min_sf);
 
   // merge df+sf channels
-  // once two selections are joined, they "forget" everything upstream
-  // i.e. need to re-apply the event weight
-  auto cut_2los_presel = df.filter(cut_2ldf_presel || cut_2lsf_presel)
-                             .weight(mc_weight * el_sf * mu_sf);
+  // evaluate the merged selection and apply it as a
+  auto cut_2los_presel =
+      cut_2los.filter(cut_df_presel || cut_ee_presel || cut_mm_presel);
 
   // make histograms
-  auto [pth_2los_presel, pth_2ldf_presel, pth_2lsf_presel] =
-      df.get(query::output<HepQ::Hist<1, float>>("pth", 30, 0, 150))
-          .fill(higgs_pt)
-          .at(cut_2los_presel, cut_2ldf_presel, cut_2lsf_presel);
+  auto [pTH_2los_presel, pTH_df_presel, pTH_ee_presel, pTH_mm_presel] =
+      df.get(query::output<HepQ::Hist<1, float>>("pTH", 30, 0, 150))
+          .fill(higgs_pT)
+          .at(cut_2los, cut_df_presel, cut_ee_presel, cut_mm_presel);
 
   // plot results
   Double_t w = 1600;
-  Double_t h = 800;
+  Double_t h = 1600;
   TCanvas c("c", "c", w, h);
   c.SetWindowSize(w + (w - c.GetWw()), h + (h - c.GetWh()));
-  c.Divide(3, 1);
+  c.Divide(2, 2);
   c.cd(1);
-  pth_2los_presel.nominal()->SetLineColor(kBlack);
-  pth_2los_presel.nominal()->Draw("ep");
-  pth_2los_presel["eg_up"]->SetLineColor(kRed);
-  pth_2los_presel["eg_up"]->Draw("same hist");
-  pth_2los_presel["eg_dn"]->SetLineColor(kBlue);
-  pth_2los_presel["eg_dn"]->Draw("same hist");
+  pTH_2los_presel.nominal()->SetTitle("2LOS");
+  pTH_2los_presel.nominal()->SetLineColor(kBlack);
+  pTH_2los_presel.nominal()->Draw("ep");
+  pTH_2los_presel["el_up"]->SetLineColor(kRed);
+  pTH_2los_presel["el_up"]->Draw("same hist");
+  pTH_2los_presel["mu_dn"]->SetLineColor(kBlue);
+  pTH_2los_presel["mu_dn"]->Draw("same hist");
+  pTH_2los_presel.nominal()->Draw("same hist");
   c.cd(2);
-  pth_2ldf_presel.nominal()->SetLineColor(kBlack);
-  pth_2ldf_presel.nominal()->Draw("ep");
-  pth_2ldf_presel["eg_up"]->SetLineColor(kRed);
-  pth_2ldf_presel["eg_up"]->Draw("same hist");
-  pth_2ldf_presel["eg_dn"]->SetLineColor(kBlue);
-  pth_2ldf_presel["eg_dn"]->Draw("same hist");
+  pTH_df_presel.nominal()->SetTitle("2LDF");
+  pTH_df_presel.nominal()->SetLineColor(kBlack);
+  pTH_df_presel.nominal()->Draw("ep");
+  pTH_df_presel["el_up"]->SetLineColor(kRed);
+  pTH_df_presel["el_up"]->Draw("same hist");
+  pTH_df_presel["mu_dn"]->SetLineColor(kBlue);
+  pTH_df_presel["mu_dn"]->Draw("same hist");
+  pTH_df_presel.nominal()->Draw("same hist");
   c.cd(3);
-  pth_2lsf_presel.nominal()->SetLineColor(kBlack);
-  pth_2lsf_presel.nominal()->Draw("ep");
-  pth_2lsf_presel["eg_up"]->SetLineColor(kRed);
-  pth_2lsf_presel["eg_up"]->Draw("same hist");
-  pth_2lsf_presel["eg_dn"]->SetLineColor(kBlue);
-  pth_2lsf_presel["eg_dn"]->Draw("same hist");
-  c.SaveAs("pth.png");
+  pTH_ee_presel.nominal()->SetTitle("2LSF (ee)");
+  pTH_ee_presel.nominal()->SetLineColor(kBlack);
+  pTH_ee_presel.nominal()->Draw("ep");
+  pTH_ee_presel["el_up"]->SetLineColor(kRed);
+  pTH_ee_presel["el_up"]->Draw("same hist");
+  pTH_ee_presel["mu_dn"]->SetLineColor(kBlue);
+  pTH_ee_presel["mu_dn"]->Draw("same hist");
+  pTH_ee_presel.nominal()->Draw("same hist");
+  c.cd(4);
+  pTH_mm_presel.nominal()->SetTitle("2LSF (mm)");
+  pTH_mm_presel.nominal()->SetLineColor(kBlack);
+  pTH_mm_presel.nominal()->Draw("ep");
+  pTH_mm_presel["el_up"]->SetLineColor(kRed);
+  pTH_mm_presel["el_up"]->Draw("same hist");
+  pTH_mm_presel["mu_dn"]->SetLineColor(kBlue);
+  pTH_mm_presel["mu_dn"]->Draw("same hist");
+  pTH_mm_presel.nominal()->Draw("same hist");
+  c.SaveAs("pTH.png");
 
   return 0;
 }
 
@@ -96,19 +96,19 @@ auto three = one + two;
 auto v_0 = v[zero];
 // reminder: actions are *lazy*, i.e. no undefined behaviour (yet)
 
-// self-assignment operators are not possible
-// one += two;
+// can be re-assigned as long as data type remains unchanged
+two = three - one;
 
 // C++ function, functor, lambda, etc. evaluated out of input columns
 // tip: pass large values by const& to prevent copies
 auto s_length = df.define(
     column::expression([](const std::string &txt) { return txt.length(); }))(s);
 @endcpp
 
-A column can also be computed through a custom definition (see @ref example-stirling for an example), which enables full control over
+A column can also be computed through a @ref example-stirling, which enables full control over its
 
 - Customization: user-defined constructor arguments and member variables/functions.
-- Optimization: the computation of each input column is deferred until value is invoked.
+- Optimization: the computation of each input column is deferred until its value is invoked.
 
 @see 
 - queryosity::column::definition (API)
@@ -200,7 +200,7 @@ auto h2xy_c = q_2xy_c.result(); // instantaneous
 
 @section guide-vary Systematic variations
 
-Specifying systematic variations on a column is as simple as it can be: provide the nominal argument and a mapping of variation name to alternate arguments to queryosity::dataflow::vary() in lieu of the usual queryosity::dataflow::define().
+To specifying systematic variations on a column, provide the nominal argument and a mapping of variation name to alternate arguments to queryosity::dataflow::vary() instead of the usual queryosity::dataflow::define().
 
 @cpp
 // dataset columns must be varied from the loaded dataset
@@ -250,7 +250,7 @@ systematic::get_variation_names(
     x, yn); // {"shift_x", "smear_x", "plus_1", "minus_1", "kill_x", "no"}
 
 auto cut = df.filter(yn);
-auto q = df.get(column::series(x)).book(cut);
+auto q = df.get(column::series(x)).at(cut);
 
 q.get_variation_names(); // same set as (x, yn) above
 
 
@@ -67,8 +67,4 @@ int main() {
       df.define(column::definition<Factorial>(/*20*/))(n, n_f_fast, n_f_full);
   // time elapsed = t(n) + { t(n_fast) if n >= 10, t(n_slow) if n < 10 }
   // :)
-
-  // advanced: access per-thread instance
-  dataflow::node::invoke([n_threshold](Factorial *n_f) { n_f->adjust_threshold(n_threshold); },
-                         n_f_best);
 }
Original file line number	Diff line number	Diff line change
`@@ -67,8 +67,4 @@ int main() {`
`67`	`67`	`df.define(column::definition<Factorial>(/20/))(n, n_f_fast, n_f_full);`
`68`	`68`	`// time elapsed = t(n) + { t(n_fast) if n >= 10, t(n_slow) if n < 10 }`
`69`	`69`	`// :)`
`70`		`-`
`71`		`- // advanced: access per-thread instance`
`72`		`- dataflow::node::invoke([n_threshold](Factorial *n_f) { n_f->adjust_threshold(n_threshold); },`
`73`		`- n_f_best);`
`74`	`70`	`}`