Rename

taehyounpark · Nov 2, 2023 · 0e53e3e · 0e53e3e
1 parent e062a40
commit 0e53e3e
Show file tree

Hide file tree

Showing 14 changed files with 222 additions and 323 deletions.
diff --git a/docs/demo/hww/notebook.md b/docs/demo/hww/notebook.md
@@ -21,9 +21,13 @@ The following tasks will be performed:
 ## Nominal
 
 ```cpp title="Setup"
-// This specific TTree has no sub-clusters available for concurrent processing
-ana::multithread::disable(); 
-auto df = ana::dataflow<Tree>({"hww.root"}, "mini");
+
+using dataflow = ana::dataflow;
+auto df = dataflow();
+
+auto tree_files = std::vector<std::string>{"hww.root"};
+auto tree_name = "mini";
+auto ds = df.open<Tree>(tree_files, tree_name);
 ```
 ```cpp title="Read out columns"
 // std::vector-like containers types with useful array operations
@@ -32,47 +36,56 @@ using VecF = ROOT::RVec<float>;
 using VecD = ROOT::RVec<float>;
 
 // event weights
-auto mc_weight = data.read<float>("mcWeight");
-auto el_sf = data.read<float>("scaleFactor_ELE");
-auto mu_sf = df.read<float>("scaleFactor_MUON");
+auto mc_weight = ds.read<float>("mcWeight");
+
+// scale factors
+auto [el_sf, mu_sf] = ds.read<float,float>({"scaleFactor_ELE","scaleFactor_MUON"});
 
 // lepton quantities
-auto lep_pt_MeV = df.read<VecF>("lep_pt");
-auto lep_eta = df.read<VecF>("lep_eta");
-auto lep_phi = df.read<VecF>("lep_phi");
-auto lep_E_MeV = df.read<VecF>("lep_E");
-auto lep_Q = df.read<VecF>("lep_charge");
-auto lep_type = df.read<VecUI>("lep_type");
+auto [
+  lep_pt_MeV,
+  lep_eta,
+  lep_phi,
+  lep_E_MeV,
+  lep_Q,
+  lep_type
+  ] = ds.read<
+    VecF,
+    VecF,
+    VecF,
+    VecF,
+    VecF,
+    VecUI>({
+      "lep_pt",
+      "lep_eta",
+      "lep_phi",
+      "lep_E",
+      "lep_charge",
+      "lep_type"
+      });
 
 // MET quantities
-auto met_MeV = df.read<float>("met_et");
-auto met_phi = df.read<float>("met_phi");
+auto [met_MeV, met_phi] = ds.read<float,float>({"met_et","met_phi"});
 ```
-
 ```cpp title="Convert from MeV to GeV"
 auto MeV = ana.constant(1000.0);
 auto lep_pt = lep_pt_MeV / MeV;
 auto lep_E = lep_pt_MeV / MeV;
 auto met = met_MeV / MeV;
 ```
-```cpp title="Select leptons within detector acceptance"
+```cpp title="Select leptons within acceptance"
 auto lep_eta_max = df.constant(2.4);
 auto lep_pt_sel = lep_pt[ lep_eta < lep_eta_max && lep_eta > (-lep_eta_max) ];
 ```
 ```cpp title="Compute dilepton+MET transverse momentum"
-using P4 = TLorentzVector;
-
-// first- & second-leading lepton four-momenta
-auto l1p4 = df.define<NthP4>(0)(lep_pt_sel, lep_eta_sel, lep_phi_sel, lep_E_sel);
-auto l2p4 = df.define<NthP4>(1)(lep_pt_sel, lep_eta_sel, lep_phi_sel, lep_E_sel);
+auto p4l1 = df.define<NthP4>(0)(lep_pt_sel, lep_eta_sel, lep_phi_sel, lep_E_sel);
+auto p4l2 = df.define<NthP4>(1)(lep_pt_sel, lep_eta_sel, lep_phi_sel, lep_E_sel);
 
-// dilepton four-momentum
-auto p4ll = l1p4+l2p4;
+auto p4ll = p4l1+p4l2;
 
-// dilepton invariant mass
+using P4 = TLorentzVector;
 auto mll = df.define([](const P4& p4){return p4.M();})(p4ll);
 
-// dilepton+MET(=higgs) transverse momentum
 auto pth = df.define(
   [](const P4& p4, float q, float q_phi) {
     TVector2 p2; p2.SetMagPhi(p4.Pt(), p4.Phi());
@@ -81,13 +94,12 @@ auto pth = df.define(
   })(p4ll, met, met_phi);
 ```
 ```cpp title="Apply selections"
-using weight = ana::selection::weight;
 
 auto n_lep_sel = df.define([](VecF const& lep){return lep.size();})(lep_pt_sel);
 auto n_lep_req = df.constant<unsigned int>(2);
 
 // apply event weight and require exactly two leptons
-auto cut_2l = df.filter<weight>("weight")(mc_weight * el_sf * mu_sf)\
+auto cut_2l = df.weight("weight")(mc_weight * el_sf * mu_sf)\
                 .filter("2l")(n_lep_sel == n_lep_req);
 
 // opposite-sign
@@ -129,23 +141,23 @@ delete out_file;
 
 ```cpp title="Vary columns"
 // use a different scale factor (electron vs. pileup...? purely for illustration)
-auto el_sf = df.read<float>("scaleFactor_ELE").vary("sf_var","scaleFactor_PILEUP");
+auto el_sf = ds.read<float>("scaleFactor_ELE").vary("sf_var","scaleFactor_PILEUP");
 
 // change the energy scale by +/-2%
 auto Escale = df.define([](VecD E){return E;}).vary("lp4_up",[](VecD E){return E*1.02;}).vary("lp4_dn",[](VecD E){return E*0.98;});
 auto lep_pt_sel = Escale(lep_pt)[ lep_eta < lep_eta_max && lep_eta > (-lep_eta_max) ];
 auto lep_E_sel = Escale(lep_E)[ lep_eta < lep_eta_max && lep_eta > (-lep_eta_max) ];
 ```
 ```cpp title="Everything else is the same..."
-auto l1p4 = df.define<NthP4>(0)(lep_pt, lep_eta, lep_phi, lep_E);
-auto l2p4 = df.define<NthP4>(1)(lep_pt, lep_eta, lep_phi, lep_E);
-l1p4.has_variation("lp4_up");  // true
-l1p4.has_variation("sf_var");  // false
+auto p4l1 = df.define<NthP4>(0)(lep_pt, lep_eta, lep_phi, lep_E);
+auto p4l2 = df.define<NthP4>(1)(lep_pt, lep_eta, lep_phi, lep_E);
+p4l1.has_variation("lp4_up");  // true
+p4l1.has_variation("sf_var");  // false
 
 // ...
 
-auto cut_2l = df.filter<weight>("weight")(mc_weight * el_sf * mu_sf)\
-                 .filter("2l")(n_lep_sel == n_lep_req);
+auto cut_2l = df.weight("weight")(mc_weight * el_sf * mu_sf)\
+                .filter("2l")(n_lep_sel == n_lep_req);
 cut_2l.has_variation("lp4_up");  // true
 cut_2l.has_variation("sf_var");  // true
 

diff --git a/docs/home/quickstart.md b/docs/home/quickstart.md
@@ -0,0 +1,19 @@
+```cpp
+#include "analogical.h"
+
+auto df = ana::dataflow( ana::multithread::enable(10) );
+
+auto [x, w] = df.open<Json>("data.json")\
+                .read<std::vector<float>, float>({"x", "w"});
+
+auto zero = df.constant(0);
+auto x0 = x[zero];
+
+auto mask = [](std::vector<float> const& v){return v.size()};
+auto masked = df.filter("mask",mask)(x).weight("weight")(w);
+
+auto hist_x0 = df.agg<Hist<float>>(axis::regular(10,0,1.0)).fill(x0);
+auto hist_x0_result = masked.book(hist_x0).result();
+
+std::cout << *(hist_x0.result()) << std::endl;
+```
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -7,8 +7,9 @@ repo_name: taehyounpark/analogical
 nav:
   - Home:
     - Welcome: index.md
-    - Installation: home/installation.md
     - Design goals: home/design.md
+    - Installation: home/installation.md
+    - Quickstart: home/quickstart.md
   - Features:
       - Basic concepts: features/basic.md
       - Dataflow: features/dataflow/dataflow.md 
@@ -27,7 +28,7 @@ nav:
       - Lazy actions: features/lazy.md
       - Implicit multithreading: features/multithread.md
   - Demos:
-    - Higgs boson: demo/hww/notebook.md
+    - Higgs WW: demo/hww/notebook.md
   - API:
     - dataflow:
       - dataflow: api/dataflow/dataflow.md

diff --git a/plugins/ana/vecx.h → plugins/ana/Columnar.h b/plugins/ana/vecx.h → plugins/ana/Columnar.h
@@ -1,19 +1,16 @@
 #pragma once
 
-#include <numeric>
 #include <unordered_map>
 #include <variant>
 
 #include "ana/analogical.h"
 
-namespace {
-
 template <typename T>
-class vecx : public ana::aggregation::logic<std::vector<T>(T)> {
+class Columnar : public ana::aggregation::logic<std::vector<T>(T)> {
 
 public:
-  vecx() = default;
-  ~vecx() = default;
+  Columnar() = default;
+  ~Columnar() = default;
 
   virtual void fill(ana::observable<T>, double) override;
   virtual std::vector<T> result() const override;
@@ -24,18 +21,16 @@ class vecx : public ana::aggregation::logic<std::vector<T>(T)> {
   std::vector<T> m_result;
 };
 
-} // namespace
-
-template <typename T> void vecx<T>::fill(ana::observable<T> x, double) {
+template <typename T> void Columnar<T>::fill(ana::observable<T> x, double) {
   m_result.push_back(x.value());
 }
 
-template <typename T> std::vector<T> vecx<T>::result() const {
+template <typename T> std::vector<T> Columnar<T>::result() const {
   return m_result;
 }
 
 template <typename T>
-std::vector<T> vecx<T>::merge(std::vector<std::vector<T>> results) const {
+std::vector<T> Columnar<T>::merge(std::vector<std::vector<T>> results) const {
   std::vector<T> merged;
   for (const auto &result : results) {
     merged.insert(merged.end(), result.begin(), result.end());

diff --git a/plugins/ana/Histogram.h b/plugins/ana/Histogram.h
@@ -0,0 +1,61 @@
+#include <boost/histogram.hpp> // make_histogram, regular, weight, indexed
+#include <functional>          // std::ref
+#include <utility>
+
+#include "ana/analogical.h"
+
+using BooleanAxis = boost::histogram::axis::boolean<>;
+using IntegerAxis = boost::histogram::axis::integer<>;
+using LinearAxis = boost::histogram::axis::regular<>;
+using VariableAxis = boost::histogram::axis::variable<>;
+
+using Axis_t = boost::histogram::axis::variant<BooleanAxis, IntegerAxis,
+                                               LinearAxis, VariableAxis>;
+using Axes_t = std::vector<Axis_t>;
+using Histogram_t = boost::histogram::histogram<Axes_t>;
+
+template <typename... Cols>
+class Histogram
+    : public ana::aggregation::logic<std::shared_ptr<Histogram_t>(Cols...)> {
+
+public:
+public:
+  template <typename... Axes> Histogram(Axes &&...axes);
+  ~Histogram() = default;
+
+  virtual void fill(ana::observable<Cols>... columns, double w) override;
+  virtual std::shared_ptr<Histogram_t> result() const override;
+  virtual std::shared_ptr<Histogram_t>
+  merge(std::vector<std::shared_ptr<Histogram_t>> results) const override;
+
+protected:
+  std::shared_ptr<Histogram_t> m_hist;
+};
+
+template <typename... Cols>
+template <typename... Axes>
+Histogram<Cols...>::hist(Axes &&...axes) {
+  m_hist = std::make_shared<Histogram_t>(std::move(
+      boost::histogram::make_weighted_histogram(std::forward<Axes>(axes)...)));
+}
+
+template <typename... Cols>
+void Histogram<Cols...>::fill(ana::observable<Cols>... columns, double w) {
+  (*m_hist)(columns.value()..., boost::histogram::weight(w));
+}
+
+template <typename... Cols>
+std::shared_ptr<Histogram_t> Histogram<Cols...>::result() const {
+  return m_hist;
+}
+
+template <typename... Cols>
+std::shared_ptr<Histogram_t> Histogram<Cols...>::merge(
+    std::vector<std::shared_ptr<Histogram_t>> results) const {
+  auto sum = std::make_shared<Histogram_t>(*results[0]);
+  sum->reset();
+  for (const auto &result : results) {
+    *sum += *result;
+  }
+  return sum;
+}
diff --git a/plugins/ana/sumw.h → plugins/ana/SumOfWeights.h b/plugins/ana/sumw.h → plugins/ana/SumOfWeights.h
@@ -8,11 +8,11 @@
 
 namespace {
 
-class sumw : public ana::aggregation::output<double> {
+class SumOfWeights : public ana::aggregation::output<double> {
 
 public:
-  sumw() = default;
-  ~sumw() = default;
+  SumOfWeights() = default;
+  ~SumOfWeights() = default;
 
   virtual void count(double w) override;
   virtual double result() const override;
@@ -24,10 +24,10 @@ class sumw : public ana::aggregation::output<double> {
 
 } // namespace
 
-void sumw::count(double w) { m_result += w; }
+void SumOfWeights::count(double w) { m_result += w; }
 
-double sumw::result() const { return m_result; }
+double SumOfWeights::result() const { return m_result; }
 
-double sumw::merge(std::vector<double> results) const {
+double SumOfWeights::merge(std::vector<double> results) const {
   return std::accumulate(results.begin(), results.end(), 0.0);
 }
diff --git a/plugins/ana/wsum.h → plugins/ana/WeightedSum.h b/plugins/ana/wsum.h → plugins/ana/WeightedSum.h
@@ -6,11 +6,11 @@
 
 #include "ana/analogical.h"
 
-class wsum : public ana::aggregation::logic<double(double)> {
+class WeightedSum : public ana::aggregation::logic<double(double)> {
 
 public:
-  wsum() = default;
-  ~wsum() = default;
+  WeightedSum() = default;
+  ~WeightedSum() = default;
 
   virtual void fill(ana::observable<double>, double) override;
   virtual double result() const override;
@@ -20,12 +20,12 @@ class wsum : public ana::aggregation::logic<double(double)> {
   double m_result;
 };
 
-void wsum::fill(ana::observable<double> x, double w) {
+void WeightedSum::fill(ana::observable<double> x, double w) {
   m_result += w * x.value();
 }
 
-double wsum::result() const { return m_result; }
+double WeightedSum::result() const { return m_result; }
 
-double wsum::merge(std::vector<double> results) const {
+double WeightedSum::merge(std::vector<double> results) const {
   return std::accumulate(results.begin(), results.end(), 0.0);
 }