taehyounpark
diff --git a/‎data/data.json
Lines changed: 162838 additions & 79998 deletions b/‎data/data.json
Lines changed: 162838 additions & 79998 deletions
diff --git a/‎docs/overrides/partials/copyright.html
Lines changed: 0 additions & 18 deletions b/‎docs/overrides/partials/copyright.html
Lines changed: 0 additions & 18 deletions
diff --git a/‎docs/pages/example.md
Lines changed: 68 additions & 31 deletions b/‎docs/pages/example.md
Lines changed: 68 additions & 31 deletions
diff --git a/‎docs/pages/guide.md
Lines changed: 43 additions & 29 deletions b/‎docs/pages/guide.md
Lines changed: 43 additions & 29 deletions
diff --git a/‎docs/stylesheets/extra.css
Lines changed: 0 additions & 3 deletions b/‎docs/stylesheets/extra.css
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/CMakeLists.txt
Lines changed: 5 additions & 0 deletions b/‎examples/CMakeLists.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/example-hello_world.cxx
Lines changed: 26 additions & 28 deletions b/‎examples/example-hello_world.cxx
Lines changed: 26 additions & 28 deletions
diff --git a/‎extensions/CMakeLists.txt
Lines changed: 5 additions & 2 deletions b/‎extensions/CMakeLists.txt
Lines changed: 5 additions & 2 deletions
@@ -3,15 +3,25 @@
 
 @section example-hello-world Hello World
 
-```cpp
-#include "queryosity/json.h"
-#include "queryosity/hist.h"
+- Example row (`v` may be empty, and `w` may be zero):
 
-#include "queryosity.h"
+@code{.json}
+[
+  { "x": 98.47054757472436, "v": [ 190.07135783114677, 14.80181202905574, 2.8667177988676418 ], "w": 1.0, }
+]
+@endcode
 
+- Select entries with non-empty `v` and `x > 100.0`.
+- Fill histogram with `v[0]` weighted by `w`.
+
+@cpp
 #include <fstream>
-#include <vector>
 #include <sstream>
+#include <vector>
+
+#include "queryosity.h"
+#include "queryosity/hist.h"
+#include "queryosity/json.h"
 
 using dataflow = qty::dataflow;
 namespace multithread = qty::multithread;
@@ -24,33 +34,60 @@ using h1d = qty::hist::hist<double>;
 using linax = qty::hist::axis::regular;
 
 int main() {
-
-	dataflow df( multithread::enable() );
-
-	std::ifstream data("data.json");
-	auto [x, w] = df.read( 
-		dataset::input<json>(data), 
-		dataset::column<std::vector<double>>("x"),
-		dataset::column<double>("w") 
-		);
-
-	auto zero = df.define( column::constant(0) );
-	auto x0 = x[zero];
-
-	auto sel = df.weight(w).filter(
-		column::expression([](std::vector<double> const& v){return v.size()}), x
-		);
-
-	auto h_x0_w = df.make( 
-		query::plan<h1d>( linax(100,0.0,1.0) ) 
-		).fill(x0).book(sel).result();
-
-	std::ostringstream os;
-	os << *h_x0_w;
-	std::cout << os.str() << std::endl;
-
+  dataflow df(multithread::enable(10));
+
+  std::ifstream data("data.json");
+  auto [x, v, w] = df.read(
+      dataset::input<json>(data), dataset::column<double>("x"),
+      dataset::column<std::vector<double>>("v"), dataset::column<double>("w"));
+
+  auto zero = df.define(column::constant(0));
+  auto v0 = v[zero];
+
+  auto sel =
+      df.weight(w)
+          .filter(column::expression(
+                      [](std::vector<double> const &v) { return v.size(); }),
+                  v)
+          .filter(column::expression([](double x) { return x > 100.0; }), x);
+
+  auto h_x0_w = df.make(query::plan<h1d>(linax(20, 0.0, 200.0)))
+                    .fill(v0)
+                    .book(sel)
+                    .result();
+
+  std::ostringstream os;
+  os << *h_x0_w;
+  std::cout << os.str() << std::endl;
 }
-```
+@endcpp
+@out
+histogram(regular(20, 0, 200, options=underflow | overflow))
+                ┌────────────────────────────────────────────────────────────┐
+[-inf,   0) 0   │                                                            │
+[   0,  10) 455 │███████████████████████████████████████████████████████████ │
+[  10,  20) 432 │████████████████████████████████████████████████████████    │
+[  20,  30) 368 │███████████████████████████████████████████████▊            │
+[  30,  40) 359 │██████████████████████████████████████████████▌             │
+[  40,  50) 309 │████████████████████████████████████████▏                   │
+[  50,  60) 249 │████████████████████████████████▎                           │
+[  60,  70) 208 │███████████████████████████                                 │
+[  70,  80) 175 │██████████████████████▊                                     │
+[  80,  90) 141 │██████████████████▎                                         │
+[  90, 100) 99  │████████████▉                                               │
+[ 100, 110) 82  │██████████▋                                                 │
+[ 110, 120) 79  │██████████▎                                                 │
+[ 120, 130) 58  │███████▌                                                    │
+[ 130, 140) 40  │█████▏                                                      │
+[ 140, 150) 20  │██▋                                                         │
+[ 150, 160) 27  │███▌                                                        │
+[ 160, 170) 19  │██▌                                                         │
+[ 170, 180) 20  │██▋                                                         │
+[ 180, 190) 18  │██▍                                                         │
+[ 190, 200) 7   │▉                                                           │
+[ 200, inf) 29  │███▊                                                        │
+                └────────────────────────────────────────────────────────────┘
+@endout
 
 @section example-hww ROOT TTree with systematic variations
 
 
@@ -37,50 +37,42 @@ dataflow df(multithread::enable(), dataset::weight(1.234), dataset::head(100));
 | `dataset::weight(scale)` | Apply a global `scale` to all weights. | `1.0` |
 | `dataset::head(nrows)` | Process the first `nrows` of the dataset. | `-1` (all entries) |
 
-@section guide-dataset-reader Reading a dataset
+@section guide-dataset-reader Reading-in dataset(s)
 
 Call queryosity::dataflow::load() with an input dataset and its constructor arguments.
 The loaded dataset can then read out columns, provided their data types and names.
 
 @cpp
 using json = qty::json;
 
-// load a dataset
 std::ifstream data("data.json");
 auto ds = df.load(dataset::input<json>(data));
 
-// read a dataset column
 auto x = ds.read(dataset::column<double>("x"));
 @endcpp
 
 A dataflow can load multiple datasets, as long as all valid partitions reported by queryosity::dataset::source::partition() have the same number of total entries.
-Or, a dataset can report an empty partition, which signals that it relinquishes the control to the other datasets.
+A dataset can report an empty partition, which signals that it relinquishes the control to the other datasets.
 
 @cpp
-// no need to be another json -- whatever else!
-std::ifstream more_data("more_data.json");
-auto ds_another = df.load(dataset::input<json>(more_data));
-
-// no need to be another double -- whatever else!
-auto y = ds_another.read(dataset::column<double>("y"));
-
-// shortcut: implicitly load a dataset and read out all columns at once.
-std::ifstream even_more_data("even_more_data.json");
-auto [s, v] = df.read(
-  dataset::input<json>(even_more_data),
-  dataset::column<std::string>("s"),
-  dataset::column<std::vector<double>>("v")
-  );
+using csv = qty::csv;
+
+std::ifstream data_csv("data.csv");
+auto y = df.load(dataset::input<csv>(data_csv)).read(dataset::column<double>("y"));
+
+auto z = x+y;
 @endcpp
 
 @see 
 - queryosity::dataset::input (API)
   - queryosity::dataset::source (ABC)
   - queryosity::dataset::reader (ABC)
   - queryosity::json (Extension)
+  - queryosity::csv (Extension)
 - queryosity::dataset::column (API)
   - queryosity::column::reader (ABC)
   - queryosity::json::item (Extension)
+  - queryosity::csv::cell (Extension)
 
 @section guide-column Computing quantities
 
@@ -135,21 +127,43 @@ auto v_selected = df.define(column::definition<>(), v);
 Call queryosity::dataflow::filter() or queryosity::dataflow::weight() to initiate a selection in the cutflow, and apply subsequent selections from existing nodes to compound them. 
 
 @cpp
+// -----------------------------------------------------------------------------
 // initiate a cutflow 
-auto inclusive = df.filter(c); // using an existing column as the decision
+// -----------------------------------------------------------------------------
+
+// pass all entries, apply a weight
+auto weighted = df.weight(w);
+
+// -----------------------------------------------------------------------------
+// compounding 
+// -----------------------------------------------------------------------------
+// cuts and weights can be compounded in any order.
 
-// selections can be compounded regardless of their type (cut or weight)
-auto weighted = cut.weight(
-  column::expression([](double w){return (w<0 ? 0.0: w);}), w
-  ); // using an exprssion evaluated out of input columns
+// ignore entry if weight is negative
+auto cut = weighted.filter(
+  column::expression([](double w){return (w>=0;);}), w
+  );
+
+// -----------------------------------------------------------------------------
+// branching out
+// -----------------------------------------------------------------------------
+// applying more than one selection from a node creates a branching point.
+
+auto cat = ds.read<std::string>("cat");
 
-// compounding multiple selections from a common node creates a branching point
-auto cut_a = weighted.filter(a);
-auto cut_b = weighted.filter(b);
-auto cut_c = weighted.filter(c);
+auto a = df.define(column::constant<std::string>("a"));
+auto b = df.define(column::constant<std::string>("b"));
+auto c = df.define(column::constant<std::string>("c"));
+
+auto cut_a = cut.filter(cat == a);
+auto cut_b = cut.filter(cat == b);
+auto cut_c = cut.filter(cat == c);
+
+// -----------------------------------------------------------------------------
+// merging
+// -----------------------------------------------------------------------------
+// selections can be merged based on their decision values.
 
-// selections are columns whose values are their decisions along the cutflow,
-// which can also be used to evaluate new selections.
 auto cut_a_and_b = df.filter(cut_a && cut_b);
 auto cut_b_or_c = df.filter(cut_b || cut_c);
 @endcpp
 
@@ -5,4 +5,9 @@ if (QUERYOSITY_EXAMPLES)
   target_link_libraries(example-hello_world queryosity::extensions pthread ${Boost_LIBRARIES})
   add_test(NAME example-hello_world COMMAND example-hello_world)
 
+  add_executable(example-multiple_datasets ./example-multiple_datasets.cxx)
+  target_compile_features(example-multiple_datasets PUBLIC cxx_std_17)
+  target_link_libraries(example-multiple_datasets queryosity::extensions pthread ${Boost_LIBRARIES})
+  add_test(NAME example-multiple_datasets COMMAND example-multiple_datasets)
+
 endif()
@@ -1,11 +1,10 @@
-#include "queryosity/json.h"
-#include "queryosity/hist.h"
-
-#include "queryosity.h"
-
 #include <fstream>
-#include <vector>
 #include <sstream>
+#include <vector>
+
+#include "queryosity.h"
+#include "queryosity/hist.h"
+#include "queryosity/json.h"
 
 using dataflow = qty::dataflow;
 namespace multithread = qty::multithread;
@@ -17,31 +16,30 @@ using json = qty::json;
 using h1d = qty::hist::hist<double>;
 using linax = qty::hist::axis::regular;
 
-int main()
-{
-
-	dataflow df(multithread::enable(10));
+int main() {
+  dataflow df(multithread::enable(10));
 
-	std::ifstream data("data.json");
-	auto [x, w] = df.read(
-			dataset::input<json>(data),
-			dataset::column<std::vector<double>>("x_nom"),
-			dataset::column<double>("w_nom"));
+  std::ifstream data("data.json");
+  auto [x, v, w] = df.read(
+      dataset::input<json>(data), dataset::column<double>("x"),
+      dataset::column<std::vector<double>>("v"), dataset::column<double>("w"));
 
-	auto zero = df.define(column::constant(0));
-	auto x0 = x[zero];
+  auto zero = df.define(column::constant(0));
+  auto v0 = v[zero];
 
-	auto sel = df.weight(w).filter(
-			column::expression([](std::vector<double> const &v)
-												 { return v.size(); }),
-			x);
+  auto sel =
+      df.weight(w)
+          .filter(column::expression(
+                      [](std::vector<double> const &v) { return v.size(); }),
+                  v)
+          .filter(column::expression([](double x) { return x > 100.0; }), x);
 
-	auto h_x0_w = df.make(
-											query::plan<h1d>(linax(100, 0.0, 1.0)))
-										.fill(x0)
-										.book(sel)
-										.result();
+  auto h_x0_w = df.make(query::plan<h1d>(linax(20, 0.0, 200.0)))
+                    .fill(v0)
+                    .book(sel)
+                    .result();
 
-	// std::ostringstream os; os << *h_x0_w;
-	// std::cout << os.str() << std::endl;
+  std::ostringstream os;
+  os << *h_x0_w;
+  std::cout << os.str() << std::endl;
 }
@@ -25,7 +25,10 @@ include(FetchContent)
 FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz)
 FetchContent_MakeAvailable(json)
 
-FetchContent_Declare(csv URL https://github.com/d99kris/rapidcsv/archive/refs/tags/v8.82.tar.gz)
+FetchContent_Declare(csv   
+  GIT_REPOSITORY https://github.com/d99kris/rapidcsv.git
+  GIT_TAG        7e87d8c
+)
 FetchContent_MakeAvailable(csv)
 
 target_include_directories(
@@ -37,5 +40,5 @@ include(ExternalProject)
 target_link_libraries(
   queryosity_extensions
   INTERFACE queryosity::queryosity
-  PUBLIC nlohmann_json::nlohmann_json ${Boost_LIBRARIES}
+  PUBLIC nlohmann_json::nlohmann_json rapidcsv ${Boost_LIBRARIES}
 )