Skip to content

Commit 47c9920

Browse files
committed
type-erased columns
1 parent 85f0756 commit 47c9920

File tree

17 files changed

+230
-180
lines changed

17 files changed

+230
-180
lines changed

docs/images/pth.png

25.3 KB
Loading

docs/pages/conceptual.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@ A `dataflow` consists of a directed, acyclic graph of tasks performed for each e
1010
An action is a node belonging to one of three task sub-graphs, each of which are associated with a set of applicable methods.
1111
Actions of each task graph can receive ones of the previous graphs as inputs:
1212

13-
| Action | Description | Methods | Description | Task Graph | Input actions |
13+
| Action | Description | Methods | Description | Task Graph | Inputs (optional) |
1414
| :--- | :-- | :-- | :-- | :-- | :-- |
1515
| `column` | Quantity of interest | `read()` | Read a column. | Computation | (`column`) |
16-
| | | `define()` | Evaluate a column. | | |
16+
| | | `define()` | Compute a column. | | |
1717
| `selection` | Boolean decision | `filter()` | Apply a cut. | Cutflow | `column` |
1818
| | Floating-point decision | `weight()` | Apply a statistical significance. | | |
19-
| `query` | Perform a query | `make()` | Plan a query. | Experiment | `column` & `selection` |
19+
| | | `book()` | Perform a query at the selection. | | |
20+
| `query` | Perform a query | `get()` | Define an output. | Experiment | (`column`) & `selection` |
2021
| | | `fill()` | Populate with column value(s). | | |
21-
| | | `book()` | Perform over selected entries. | | |
22+
| | | `at()` | Perform over selected entries. | | |
23+
| | | `result()` | Get the result. | | |
2224

2325
@section conceptual-lazy Lazy actions
2426

docs/pages/example.md

Lines changed: 139 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -209,19 +209,19 @@ auto [yield_a, yield_b, yield_c] =
209209

210210
@section example-hep More examples
211211

212-
- [HepQuery](https://github.com/taehyounpark/queryosity-hep)
212+
[HepQuery](https://github.com/taehyounpark/queryosity-hep) provides the extensions for ROOT TTree datasets and ROOT `TH1`-based outputs.
213213

214214
@subsection example-hep-hww ROOT TTree
215215

216216
- Simulated ggF HWW* events: [ATLAS open data](https://opendata.cern.ch/record/3825).
217217

218218
1. Apply the MC event weight.
219219
2. Select entries for which there are exactly two opposite-sign leptons in the event.
220-
3. Separate into different/same-flavour channel for electrons and muons.
221-
- @f$m_{\ell\ell} > 12(10)\,\mathrm{GeV}@f$ for same(different)-flavour.
222-
5. Merge channels to form flavour-inclusive opposite-sign region.
220+
3. Separate into different/same-flavour channels for electrons and muons.
221+
4. Require @f$m_{\ell\ell} > 10(12)\,\mathrm{GeV}@f$ for different(same)-flavour channel.
222+
5. Merge channels to form flavour-inclusive opposite-sign region post-@f$m_{\ell\ell}@f$ cut.
223223
6. In each region, plot the distribution of @f$p_{\mathrm{T}}^H = \left| \mathbf{p}_{\mathrm{T}}^{\ell\ell} + \mathbf{p}_{\mathrm{T}}^{\mathrm{miss}} \right|@f$.
224-
- Scale lepton energy scale by @f$\pm 2\,\%@f$ as systematic variations.
224+
- Scale electron(muon) energy scale by @f$\pm 1(2)\,\%@f$ as systematic variations.
225225

226226
@cpp
227227
#include "HepQuery/Hist.h"
@@ -257,15 +257,15 @@ using P4 = ROOT::Math::PtEtaPhiEVector;
257257
#include <sstream>
258258

259259
// compute the nth-leading four-momentum out of (pt, eta, phi, m) arrays
260-
class NthP4 : public column::definition<P4(VecD, VecD, VecD, VecD)> {
260+
class NthP4 : public column::definition<P4(VecF, VecF, VecF, VecF)> {
261261

262262
public:
263263
NthP4(unsigned int index) : m_index(index) {}
264264
virtual ~NthP4() = default;
265265

266-
virtual P4 evaluate(column::observable<VecD> pt, column::observable<VecD> eta,
267-
column::observable<VecD> phi,
268-
column::observable<VecD> es) const override {
266+
virtual P4 evaluate(column::observable<VecF> pt, column::observable<VecF> eta,
267+
column::observable<VecF> phi,
268+
column::observable<VecF> es) const override {
269269
return P4(pt->at(m_index), eta->at(m_index), phi->at(m_index),
270270
es->at(m_index));
271271
}
@@ -276,62 +276,106 @@ protected:
276276

277277
int main() {
278278

279+
// load dataset (not enough events to multithread)
279280
std::vector<std::string> tree_files{"hww.root"};
280281
std::string tree_name = "mini";
281-
282-
// not enough events to multithread
283282
dataflow df(multithread::disable());
284283
auto ds = df.load(dataset::input<HepQ::Tree>(tree_files, tree_name));
285284

286285
// weights
287286
auto mc_weight = ds.read(dataset::column<float>("mcWeight"));
288-
auto mu_sf = ds.read(dataset::column<float>("scaleFactor_MUON"));
289-
auto el_sf = ds.read(dataset::column<float>("scaleFactor_ELE"));
290-
287+
auto mu_SF = ds.read(dataset::column<float>("scaleFactor_MUON"));
288+
auto el_SF = ds.read(dataset::column<float>("scaleFactor_ELE"));
291289
// leptons
292-
auto [lep_pt_MeV, lep_eta, lep_phi, lep_E_MeV, lep_Q, lep_type] = ds.read(
290+
auto [lep_pT, lep_eta, lep_phi, lep_E, lep_Q, lep_type] = ds.read(
293291
dataset::column<VecF>("lep_pt"), dataset::column<VecF>("lep_eta"),
294292
dataset::column<VecF>("lep_phi"), dataset::column<VecF>("lep_E"),
295293
dataset::column<VecF>("lep_charge"), dataset::column<VecUI>("lep_type"));
296-
297294
// missing transverse energy
298-
auto [met_MeV, met_phi] = ds.read(dataset::column<float>("met_et"),
295+
auto [met, met_phi] = ds.read(dataset::column<float>("met_et"),
299296
dataset::column<float>("met_phi"));
300-
301297
// units
302-
auto MeV = df.define(column::constant(1000.0));
303-
auto lep_pt = lep_pt_MeV / MeV;
304-
auto lep_E = lep_E_MeV / MeV;
305-
auto met = met_MeV / MeV;
306-
307-
// vary the energy scale by +/-2%
308-
auto Escale = df.vary(column::expression([](VecD E) { return E; }),
309-
{{"eg_up", [](VecD E) { return E * 1.02; }},
310-
{"eg_dn", [](VecD E) { return E * 0.98; }}});
311-
312-
// apply the energy scale (uncertainties) and select within acceptance
313-
auto lep_pt_min = df.define(column::constant(15.0));
298+
auto MeV = df.define(column::constant<float>(1000.0));
299+
lep_pT = lep_pT / MeV;
300+
lep_E = lep_E / MeV;
301+
met = met / MeV;
302+
303+
// select electrons
304+
auto el_sel = lep_type == df.define(column::constant(11));
305+
auto el_pT_nom = lep_pT[el_sel];
306+
auto el_eta = lep_eta[el_sel];
307+
auto el_phi = lep_phi[el_sel];
308+
auto el_E_nom = lep_E[el_sel];
309+
auto el_Q = lep_Q[el_sel];
310+
auto el_type = lep_type[el_sel];
311+
// select muons
312+
auto mu_sel = lep_type == df.define(column::constant(13));
313+
auto mu_pT_nom = lep_pT[mu_sel];
314+
auto mu_eta = lep_eta[mu_sel];
315+
auto mu_phi = lep_phi[mu_sel];
316+
auto mu_E_nom = lep_E[mu_sel];
317+
auto mu_Q = lep_Q[mu_sel];
318+
auto mu_type = lep_type[mu_sel];
319+
320+
// vary the energy scale by +/-1(2)% for electrons(muons)
321+
auto el_scale = df.vary(column::expression([](VecF const &E) { return E; }),
322+
{{"el_up", [](VecF const &E) { return E * 1.01; }},
323+
{"el_dn", [](VecF const &E) { return E * 0.99; }}});
324+
auto mu_scale = df.vary(column::expression([](VecF const &E) { return E; }),
325+
{{"mu_up", [](VecF const &E) { return E * 1.02; }},
326+
{"mu_dn", [](VecF const &E) { return E * 0.98; }}});
327+
auto el_pT = el_scale(el_pT_nom);
328+
auto el_E = el_scale(el_E_nom);
329+
auto mu_pT = mu_scale(mu_pT_nom);
330+
auto mu_E = mu_scale(mu_E_nom);
331+
332+
// re-concatenate into el+mu arrays
333+
auto concat = [](VecF const &v1, VecF const &v2) {
334+
return ROOT::VecOps::Concatenate(v1, v2);
335+
};
336+
auto el_mu_pT = df.define(column::expression(concat))(el_pT, mu_pT);
337+
auto el_mu_eta = df.define(column::expression(concat))(el_eta, mu_eta);
338+
auto el_mu_phi = df.define(column::expression(concat))(el_phi, mu_phi);
339+
auto el_mu_E = df.define(column::expression(concat))(el_E, mu_E);
340+
auto el_mu_Q = df.define(column::expression(concat))(el_Q, mu_Q);
341+
auto el_mu_type = df.define(column::expression(concat))(el_type, mu_type);
342+
343+
// take sorted lepton arrays
344+
auto take = df.define(column::expression([](VecF const &v, VecUI const &is) {
345+
return ROOT::VecOps::Take(v, is);
346+
}));
347+
auto lep_indices = df.define(column::expression(
348+
[](VecF const &v) { return ROOT::VecOps::Argsort(v); }))(el_mu_pT);
349+
auto lep_pT_syst = take(el_mu_pT, lep_indices);
350+
auto lep_eta_syst = take(el_mu_eta, lep_indices);
351+
auto lep_phi_syst = take(el_mu_phi, lep_indices);
352+
auto lep_E_syst = take(el_mu_E, lep_indices);
353+
auto lep_Q_syst = take(el_mu_Q, lep_indices);
354+
auto lep_type_syst = take(el_mu_type, lep_indices);
355+
356+
// apply acceptance selections
357+
auto lep_pT_min = df.define(column::constant(15.0));
314358
auto lep_eta_max = df.define(column::constant(2.4));
315-
auto lep_selection = (lep_eta < lep_eta_max) && (lep_eta > (-lep_eta_max)) &&
316-
(lep_pt > lep_pt_min);
317-
auto lep_pt_sel = Escale(lep_pt)[lep_selection];
318-
auto lep_E_sel = Escale(lep_E)[lep_selection];
319-
auto lep_eta_sel = lep_eta[lep_selection];
320-
auto lep_phi_sel = lep_phi[lep_selection];
321-
auto lep_Q_sel = lep_Q[lep_selection];
322-
auto lep_type_sel = lep_type[lep_selection];
359+
auto lep_sel = (lep_eta_syst < lep_eta_max) &&
360+
(lep_eta_syst > (-lep_eta_max)) && (lep_pT_syst > lep_pT_min);
361+
auto lep_pT_sel = lep_pT_syst[lep_sel];
362+
auto lep_E_sel = lep_E_syst[lep_sel];
363+
auto lep_eta_sel = lep_eta_syst[lep_sel];
364+
auto lep_phi_sel = lep_phi_syst[lep_sel];
365+
auto lep_Q_sel = lep_Q_syst[lep_sel];
366+
auto lep_type_sel = lep_type_syst[lep_sel];
323367

324368
// compute (sub-)leading lepton four-momentum
325-
auto l1p4 = df.define(column::definition<NthP4>(0))(lep_pt_sel, lep_eta_sel,
369+
auto l1p4 = df.define(column::definition<NthP4>(0))(lep_pT_sel, lep_eta_sel,
326370
lep_phi_sel, lep_E_sel);
327-
auto l2p4 = df.define(column::definition<NthP4>(1))(lep_pt_sel, lep_eta_sel,
371+
auto l2p4 = df.define(column::definition<NthP4>(1))(lep_pT_sel, lep_eta_sel,
328372
lep_phi_sel, lep_E_sel);
329373

330374
// compute dilepton invariant mass & higgs transverse momentum
331375
auto llp4 = l1p4 + l2p4;
332376
auto mll =
333377
df.define(column::expression([](const P4 &p4) { return p4.M(); }))(llp4);
334-
auto higgs_pt =
378+
auto higgs_pT =
335379
df.define(column::expression([](const P4 &p4, float q, float q_phi) {
336380
TVector2 p2;
337381
p2.SetMagPhi(p4.Pt(), p4.Phi());
@@ -343,10 +387,10 @@ int main() {
343387
// compute number of leptons
344388
auto nlep_req = df.define(column::constant<unsigned int>(2));
345389
auto nlep_sel = df.define(column::expression(
346-
[](VecD const &lep) { return lep.size(); }))(lep_pt_sel);
390+
[](VecF const &lep) { return lep.size(); }))(lep_pT_sel);
347391

348392
// apply MC event weight * electron & muon scale factors
349-
auto weighted = df.weight(mc_weight * el_sf * mu_sf);
393+
auto weighted = df.weight(mc_weight * el_SF * mu_SF);
350394

351395
// require 2 opoosite-signed leptons
352396
auto cut_2l = weighted.filter(nlep_sel == nlep_req);
@@ -355,60 +399,77 @@ int main() {
355399
}))(lep_Q_sel);
356400

357401
// branch out into differet/same-flavour channels
358-
auto cut_2ldf = cut_2los.filter(column::expression([](const VecI &lep_type) {
402+
auto cut_df = cut_2los.filter(column::expression([](const VecI &lep_type) {
359403
return lep_type[0] + lep_type[1] == 24;
360404
}))(lep_type_sel);
361-
auto cut_2lsf = cut_2los.filter(column::expression([](const VecI &lep_type) {
362-
return (lep_type[0] + lep_type[1] == 22) ||
363-
(lep_type[0] + lep_type[1] == 26);
405+
auto cut_ee = cut_2los.filter(column::expression([](const VecI &lep_type) {
406+
return (lep_type[0] + lep_type[1] == 22);
407+
}))(lep_type_sel);
408+
auto cut_mm = cut_2los.filter(column::expression([](const VecI &lep_type) {
409+
return (lep_type[0] + lep_type[1] == 26);
364410
}))(lep_type_sel);
365411

366412
// apply (different) cuts for each channel
367413
auto mll_min_df = df.define(column::constant(10.0));
368-
auto cut_2ldf_presel = cut_2ldf.filter(mll > mll_min_df);
414+
auto cut_df_presel = cut_df.filter(mll > mll_min_df);
369415
auto mll_min_sf = df.define(column::constant(12.0));
370-
auto cut_2lsf_presel = cut_2lsf.filter(mll > mll_min_sf);
416+
auto cut_ee_presel = cut_ee.filter(mll > mll_min_sf);
417+
auto cut_mm_presel = cut_mm.filter(mll > mll_min_sf);
371418

372419
// merge df+sf channels
373-
// once two selections are joined, they "forget" everything upstream
374-
// i.e. need to re-apply the event weight
375-
auto cut_2los_presel = df.filter(cut_2ldf_presel || cut_2lsf_presel)
376-
.weight(mc_weight * el_sf * mu_sf);
420+
// evaluate the merged selection and apply it as a
421+
auto cut_2los_presel =
422+
cut_2los.filter(cut_df_presel || cut_ee_presel || cut_mm_presel);
377423

378424
// make histograms
379-
auto [pth_2los_presel, pth_2ldf_presel, pth_2lsf_presel] =
380-
df.get(query::output<HepQ::Hist<1, float>>("pth", 30, 0, 150))
381-
.fill(higgs_pt)
382-
.at(cut_2los_presel, cut_2ldf_presel, cut_2lsf_presel);
425+
auto [pTH_2los_presel, pTH_df_presel, pTH_ee_presel, pTH_mm_presel] =
426+
df.get(query::output<HepQ::Hist<1, float>>("pTH", 30, 0, 150))
427+
.fill(higgs_pT)
428+
.at(cut_2los, cut_df_presel, cut_ee_presel, cut_mm_presel);
383429

384430
// plot results
385431
Double_t w = 1600;
386-
Double_t h = 800;
432+
Double_t h = 1600;
387433
TCanvas c("c", "c", w, h);
388434
c.SetWindowSize(w + (w - c.GetWw()), h + (h - c.GetWh()));
389-
c.Divide(3, 1);
435+
c.Divide(2, 2);
390436
c.cd(1);
391-
pth_2los_presel.nominal()->SetLineColor(kBlack);
392-
pth_2los_presel.nominal()->Draw("ep");
393-
pth_2los_presel["eg_up"]->SetLineColor(kRed);
394-
pth_2los_presel["eg_up"]->Draw("same hist");
395-
pth_2los_presel["eg_dn"]->SetLineColor(kBlue);
396-
pth_2los_presel["eg_dn"]->Draw("same hist");
437+
pTH_2los_presel.nominal()->SetTitle("2LOS");
438+
pTH_2los_presel.nominal()->SetLineColor(kBlack);
439+
pTH_2los_presel.nominal()->Draw("ep");
440+
pTH_2los_presel["el_up"]->SetLineColor(kRed);
441+
pTH_2los_presel["el_up"]->Draw("same hist");
442+
pTH_2los_presel["mu_dn"]->SetLineColor(kBlue);
443+
pTH_2los_presel["mu_dn"]->Draw("same hist");
444+
pTH_2los_presel.nominal()->Draw("same hist");
397445
c.cd(2);
398-
pth_2ldf_presel.nominal()->SetLineColor(kBlack);
399-
pth_2ldf_presel.nominal()->Draw("ep");
400-
pth_2ldf_presel["eg_up"]->SetLineColor(kRed);
401-
pth_2ldf_presel["eg_up"]->Draw("same hist");
402-
pth_2ldf_presel["eg_dn"]->SetLineColor(kBlue);
403-
pth_2ldf_presel["eg_dn"]->Draw("same hist");
446+
pTH_df_presel.nominal()->SetTitle("2LDF");
447+
pTH_df_presel.nominal()->SetLineColor(kBlack);
448+
pTH_df_presel.nominal()->Draw("ep");
449+
pTH_df_presel["el_up"]->SetLineColor(kRed);
450+
pTH_df_presel["el_up"]->Draw("same hist");
451+
pTH_df_presel["mu_dn"]->SetLineColor(kBlue);
452+
pTH_df_presel["mu_dn"]->Draw("same hist");
453+
pTH_df_presel.nominal()->Draw("same hist");
404454
c.cd(3);
405-
pth_2lsf_presel.nominal()->SetLineColor(kBlack);
406-
pth_2lsf_presel.nominal()->Draw("ep");
407-
pth_2lsf_presel["eg_up"]->SetLineColor(kRed);
408-
pth_2lsf_presel["eg_up"]->Draw("same hist");
409-
pth_2lsf_presel["eg_dn"]->SetLineColor(kBlue);
410-
pth_2lsf_presel["eg_dn"]->Draw("same hist");
411-
c.SaveAs("pth.png");
455+
pTH_ee_presel.nominal()->SetTitle("2LSF (ee)");
456+
pTH_ee_presel.nominal()->SetLineColor(kBlack);
457+
pTH_ee_presel.nominal()->Draw("ep");
458+
pTH_ee_presel["el_up"]->SetLineColor(kRed);
459+
pTH_ee_presel["el_up"]->Draw("same hist");
460+
pTH_ee_presel["mu_dn"]->SetLineColor(kBlue);
461+
pTH_ee_presel["mu_dn"]->Draw("same hist");
462+
pTH_ee_presel.nominal()->Draw("same hist");
463+
c.cd(4);
464+
pTH_mm_presel.nominal()->SetTitle("2LSF (mm)");
465+
pTH_mm_presel.nominal()->SetLineColor(kBlack);
466+
pTH_mm_presel.nominal()->Draw("ep");
467+
pTH_mm_presel["el_up"]->SetLineColor(kRed);
468+
pTH_mm_presel["el_up"]->Draw("same hist");
469+
pTH_mm_presel["mu_dn"]->SetLineColor(kBlue);
470+
pTH_mm_presel["mu_dn"]->Draw("same hist");
471+
pTH_mm_presel.nominal()->Draw("same hist");
472+
c.SaveAs("pTH.png");
412473

413474
return 0;
414475
}

docs/pages/guide.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,19 @@ auto three = one + two;
9696
auto v_0 = v[zero];
9797
// reminder: actions are *lazy*, i.e. no undefined behaviour (yet)
9898

99-
// self-assignment operators are not possible
100-
// one += two;
99+
// can be re-assigned as long as data type remains unchanged
100+
two = three - one;
101101

102102
// C++ function, functor, lambda, etc. evaluated out of input columns
103103
// tip: pass large values by const& to prevent copies
104104
auto s_length = df.define(
105105
column::expression([](const std::string &txt) { return txt.length(); }))(s);
106106
@endcpp
107107

108-
A column can also be computed through a custom definition (see @ref example-stirling for an example), which enables full control over
108+
A column can also be computed through a @ref example-stirling, which enables full control over its
109109

110110
- Customization: user-defined constructor arguments and member variables/functions.
111-
- Optimization: the computation of each input column is deferred until value is invoked.
111+
- Optimization: the computation of each input column is deferred until its value is invoked.
112112

113113
@see
114114
- queryosity::column::definition (API)
@@ -200,7 +200,7 @@ auto h2xy_c = q_2xy_c.result(); // instantaneous
200200

201201
@section guide-vary Systematic variations
202202

203-
Specifying systematic variations on a column is as simple as it can be: provide the nominal argument and a mapping of variation name to alternate arguments to queryosity::dataflow::vary() in lieu of the usual queryosity::dataflow::define().
203+
To specifying systematic variations on a column, provide the nominal argument and a mapping of variation name to alternate arguments to queryosity::dataflow::vary() instead of the usual queryosity::dataflow::define().
204204

205205
@cpp
206206
// dataset columns must be varied from the loaded dataset
@@ -250,7 +250,7 @@ systematic::get_variation_names(
250250
x, yn); // {"shift_x", "smear_x", "plus_1", "minus_1", "kill_x", "no"}
251251

252252
auto cut = df.filter(yn);
253-
auto q = df.get(column::series(x)).book(cut);
253+
auto q = df.get(column::series(x)).at(cut);
254254

255255
q.get_variation_names(); // same set as (x, yn) above
256256

examples/example-03.cxx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,4 @@ int main() {
6767
df.define(column::definition<Factorial>(/*20*/))(n, n_f_fast, n_f_full);
6868
// time elapsed = t(n) + { t(n_fast) if n >= 10, t(n_slow) if n < 10 }
6969
// :)
70-
71-
// advanced: access per-thread instance
72-
dataflow::node::invoke([n_threshold](Factorial *n_f) { n_f->adjust_threshold(n_threshold); },
73-
n_f_best);
7470
}

0 commit comments

Comments
 (0)