Skip to content

Commit d74ecc0

Browse files
committed
Renaming, cleaning up & docs
1 parent 8af03b6 commit d74ecc0

25 files changed

+163925
-81087
lines changed

data/data.json

Lines changed: 162838 additions & 79998 deletions
Large diffs are not rendered by default.

docs/overrides/partials/copyright.html

Lines changed: 0 additions & 18 deletions
This file was deleted.

docs/pages/example.md

Lines changed: 68 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,25 @@
33

44
@section example-hello-world Hello World
55

6-
```cpp
7-
#include "queryosity/json.h"
8-
#include "queryosity/hist.h"
6+
- Example row (`v` may be empty, and `w` may be zero):
97

10-
#include "queryosity.h"
8+
@code{.json}
9+
[
10+
{ "x": 98.47054757472436, "v": [ 190.07135783114677, 14.80181202905574, 2.8667177988676418 ], "w": 1.0, }
11+
]
12+
@endcode
1113

14+
- Select entries with non-empty `v` and `x > 100.0`.
15+
- Fill histogram with `v[0]` weighted by `w`.
16+
17+
@cpp
1218
#include <fstream>
13-
#include <vector>
1419
#include <sstream>
20+
#include <vector>
21+
22+
#include "queryosity.h"
23+
#include "queryosity/hist.h"
24+
#include "queryosity/json.h"
1525

1626
using dataflow = qty::dataflow;
1727
namespace multithread = qty::multithread;
@@ -24,33 +34,60 @@ using h1d = qty::hist::hist<double>;
2434
using linax = qty::hist::axis::regular;
2535

2636
int main() {
27-
28-
dataflow df( multithread::enable() );
29-
30-
std::ifstream data("data.json");
31-
auto [x, w] = df.read(
32-
dataset::input<json>(data),
33-
dataset::column<std::vector<double>>("x"),
34-
dataset::column<double>("w")
35-
);
36-
37-
auto zero = df.define( column::constant(0) );
38-
auto x0 = x[zero];
39-
40-
auto sel = df.weight(w).filter(
41-
column::expression([](std::vector<double> const& v){return v.size()}), x
42-
);
43-
44-
auto h_x0_w = df.make(
45-
query::plan<h1d>( linax(100,0.0,1.0) )
46-
).fill(x0).book(sel).result();
47-
48-
std::ostringstream os;
49-
os << *h_x0_w;
50-
std::cout << os.str() << std::endl;
51-
37+
dataflow df(multithread::enable(10));
38+
39+
std::ifstream data("data.json");
40+
auto [x, v, w] = df.read(
41+
dataset::input<json>(data), dataset::column<double>("x"),
42+
dataset::column<std::vector<double>>("v"), dataset::column<double>("w"));
43+
44+
auto zero = df.define(column::constant(0));
45+
auto v0 = v[zero];
46+
47+
auto sel =
48+
df.weight(w)
49+
.filter(column::expression(
50+
[](std::vector<double> const &v) { return v.size(); }),
51+
v)
52+
.filter(column::expression([](double x) { return x > 100.0; }), x);
53+
54+
auto h_x0_w = df.make(query::plan<h1d>(linax(20, 0.0, 200.0)))
55+
.fill(v0)
56+
.book(sel)
57+
.result();
58+
59+
std::ostringstream os;
60+
os << *h_x0_w;
61+
std::cout << os.str() << std::endl;
5262
}
53-
```
63+
@endcpp
64+
@out
65+
histogram(regular(20, 0, 200, options=underflow | overflow))
66+
┌────────────────────────────────────────────────────────────┐
67+
[-inf, 0) 0 │ │
68+
[ 0, 10) 455 │███████████████████████████████████████████████████████████ │
69+
[ 10, 20) 432 │████████████████████████████████████████████████████████ │
70+
[ 20, 30) 368 │███████████████████████████████████████████████▊ │
71+
[ 30, 40) 359 │██████████████████████████████████████████████▌ │
72+
[ 40, 50) 309 │████████████████████████████████████████▏ │
73+
[ 50, 60) 249 │████████████████████████████████▎ │
74+
[ 60, 70) 208 │███████████████████████████ │
75+
[ 70, 80) 175 │██████████████████████▊ │
76+
[ 80, 90) 141 │██████████████████▎ │
77+
[ 90, 100) 99 │████████████▉ │
78+
[ 100, 110) 82 │██████████▋ │
79+
[ 110, 120) 79 │██████████▎ │
80+
[ 120, 130) 58 │███████▌ │
81+
[ 130, 140) 40 │█████▏ │
82+
[ 140, 150) 20 │██▋ │
83+
[ 150, 160) 27 │███▌ │
84+
[ 160, 170) 19 │██▌ │
85+
[ 170, 180) 20 │██▋ │
86+
[ 180, 190) 18 │██▍ │
87+
[ 190, 200) 7 │▉ │
88+
[ 200, inf) 29 │███▊ │
89+
└────────────────────────────────────────────────────────────┘
90+
@endout
5491

5592
@section example-hww ROOT TTree with systematic variations
5693

docs/pages/guide.md

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,50 +37,42 @@ dataflow df(multithread::enable(), dataset::weight(1.234), dataset::head(100));
3737
| `dataset::weight(scale)` | Apply a global `scale` to all weights. | `1.0` |
3838
| `dataset::head(nrows)` | Process the first `nrows` of the dataset. | `-1` (all entries) |
3939

40-
@section guide-dataset-reader Reading a dataset
40+
@section guide-dataset-reader Reading-in dataset(s)
4141

4242
Call queryosity::dataflow::load() with an input dataset and its constructor arguments.
4343
The loaded dataset can then read out columns, provided their data types and names.
4444

4545
@cpp
4646
using json = qty::json;
4747

48-
// load a dataset
4948
std::ifstream data("data.json");
5049
auto ds = df.load(dataset::input<json>(data));
5150

52-
// read a dataset column
5351
auto x = ds.read(dataset::column<double>("x"));
5452
@endcpp
5553

5654
A dataflow can load multiple datasets, as long as all valid partitions reported by queryosity::dataset::source::partition() have the same number of total entries.
57-
Or, a dataset can report an empty partition, which signals that it relinquishes the control to the other datasets.
55+
A dataset can report an empty partition, which signals that it relinquishes the control to the other datasets.
5856

5957
@cpp
60-
// no need to be another json -- whatever else!
61-
std::ifstream more_data("more_data.json");
62-
auto ds_another = df.load(dataset::input<json>(more_data));
63-
64-
// no need to be another double -- whatever else!
65-
auto y = ds_another.read(dataset::column<double>("y"));
66-
67-
// shortcut: implicitly load a dataset and read out all columns at once.
68-
std::ifstream even_more_data("even_more_data.json");
69-
auto [s, v] = df.read(
70-
dataset::input<json>(even_more_data),
71-
dataset::column<std::string>("s"),
72-
dataset::column<std::vector<double>>("v")
73-
);
58+
using csv = qty::csv;
59+
60+
std::ifstream data_csv("data.csv");
61+
auto y = df.load(dataset::input<csv>(data_csv)).read(dataset::column<double>("y"));
62+
63+
auto z = x+y;
7464
@endcpp
7565

7666
@see
7767
- queryosity::dataset::input (API)
7868
- queryosity::dataset::source (ABC)
7969
- queryosity::dataset::reader (ABC)
8070
- queryosity::json (Extension)
71+
- queryosity::csv (Extension)
8172
- queryosity::dataset::column (API)
8273
- queryosity::column::reader (ABC)
8374
- queryosity::json::item (Extension)
75+
- queryosity::csv::cell (Extension)
8476

8577
@section guide-column Computing quantities
8678

@@ -135,21 +127,43 @@ auto v_selected = df.define(column::definition<>(), v);
135127
Call queryosity::dataflow::filter() or queryosity::dataflow::weight() to initiate a selection in the cutflow, and apply subsequent selections from existing nodes to compound them.
136128

137129
@cpp
130+
// -----------------------------------------------------------------------------
138131
// initiate a cutflow
139-
auto inclusive = df.filter(c); // using an existing column as the decision
132+
// -----------------------------------------------------------------------------
133+
134+
// pass all entries, apply a weight
135+
auto weighted = df.weight(w);
136+
137+
// -----------------------------------------------------------------------------
138+
// compounding
139+
// -----------------------------------------------------------------------------
140+
// cuts and weights can be compounded in any order.
140141

141-
// selections can be compounded regardless of their type (cut or weight)
142-
auto weighted = cut.weight(
143-
column::expression([](double w){return (w<0 ? 0.0: w);}), w
144-
); // using an exprssion evaluated out of input columns
142+
// ignore entry if weight is negative
143+
auto cut = weighted.filter(
144+
column::expression([](double w){return (w>=0;);}), w
145+
);
146+
147+
// -----------------------------------------------------------------------------
148+
// branching out
149+
// -----------------------------------------------------------------------------
150+
// applying more than one selection from a node creates a branching point.
151+
152+
auto cat = ds.read<std::string>("cat");
145153

146-
// compounding multiple selections from a common node creates a branching point
147-
auto cut_a = weighted.filter(a);
148-
auto cut_b = weighted.filter(b);
149-
auto cut_c = weighted.filter(c);
154+
auto a = df.define(column::constant<std::string>("a"));
155+
auto b = df.define(column::constant<std::string>("b"));
156+
auto c = df.define(column::constant<std::string>("c"));
157+
158+
auto cut_a = cut.filter(cat == a);
159+
auto cut_b = cut.filter(cat == b);
160+
auto cut_c = cut.filter(cat == c);
161+
162+
// -----------------------------------------------------------------------------
163+
// merging
164+
// -----------------------------------------------------------------------------
165+
// selections can be merged based on their decision values.
150166

151-
// selections are columns whose values are their decisions along the cutflow,
152-
// which can also be used to evaluate new selections.
153167
auto cut_a_and_b = df.filter(cut_a && cut_b);
154168
auto cut_b_or_c = df.filter(cut_b || cut_c);
155169
@endcpp

docs/stylesheets/extra.css

Lines changed: 0 additions & 3 deletions
This file was deleted.

examples/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,9 @@ if (QUERYOSITY_EXAMPLES)
55
target_link_libraries(example-hello_world queryosity::extensions pthread ${Boost_LIBRARIES})
66
add_test(NAME example-hello_world COMMAND example-hello_world)
77

8+
add_executable(example-multiple_datasets ./example-multiple_datasets.cxx)
9+
target_compile_features(example-multiple_datasets PUBLIC cxx_std_17)
10+
target_link_libraries(example-multiple_datasets queryosity::extensions pthread ${Boost_LIBRARIES})
11+
add_test(NAME example-multiple_datasets COMMAND example-multiple_datasets)
12+
813
endif()

examples/example-hello_world.cxx

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
#include "queryosity/json.h"
2-
#include "queryosity/hist.h"
3-
4-
#include "queryosity.h"
5-
61
#include <fstream>
7-
#include <vector>
82
#include <sstream>
3+
#include <vector>
4+
5+
#include "queryosity.h"
6+
#include "queryosity/hist.h"
7+
#include "queryosity/json.h"
98

109
using dataflow = qty::dataflow;
1110
namespace multithread = qty::multithread;
@@ -17,31 +16,30 @@ using json = qty::json;
1716
using h1d = qty::hist::hist<double>;
1817
using linax = qty::hist::axis::regular;
1918

20-
int main()
21-
{
22-
23-
dataflow df(multithread::enable(10));
19+
int main() {
20+
dataflow df(multithread::enable(10));
2421

25-
std::ifstream data("data.json");
26-
auto [x, w] = df.read(
27-
dataset::input<json>(data),
28-
dataset::column<std::vector<double>>("x_nom"),
29-
dataset::column<double>("w_nom"));
22+
std::ifstream data("data.json");
23+
auto [x, v, w] = df.read(
24+
dataset::input<json>(data), dataset::column<double>("x"),
25+
dataset::column<std::vector<double>>("v"), dataset::column<double>("w"));
3026

31-
auto zero = df.define(column::constant(0));
32-
auto x0 = x[zero];
27+
auto zero = df.define(column::constant(0));
28+
auto v0 = v[zero];
3329

34-
auto sel = df.weight(w).filter(
35-
column::expression([](std::vector<double> const &v)
36-
{ return v.size(); }),
37-
x);
30+
auto sel =
31+
df.weight(w)
32+
.filter(column::expression(
33+
[](std::vector<double> const &v) { return v.size(); }),
34+
v)
35+
.filter(column::expression([](double x) { return x > 100.0; }), x);
3836

39-
auto h_x0_w = df.make(
40-
query::plan<h1d>(linax(100, 0.0, 1.0)))
41-
.fill(x0)
42-
.book(sel)
43-
.result();
37+
auto h_x0_w = df.make(query::plan<h1d>(linax(20, 0.0, 200.0)))
38+
.fill(v0)
39+
.book(sel)
40+
.result();
4441

45-
// std::ostringstream os; os << *h_x0_w;
46-
// std::cout << os.str() << std::endl;
42+
std::ostringstream os;
43+
os << *h_x0_w;
44+
std::cout << os.str() << std::endl;
4745
}

extensions/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ include(FetchContent)
2525
FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz)
2626
FetchContent_MakeAvailable(json)
2727

28-
FetchContent_Declare(csv URL https://github.com/d99kris/rapidcsv/archive/refs/tags/v8.82.tar.gz)
28+
FetchContent_Declare(csv
29+
GIT_REPOSITORY https://github.com/d99kris/rapidcsv.git
30+
GIT_TAG 7e87d8c
31+
)
2932
FetchContent_MakeAvailable(csv)
3033

3134
target_include_directories(
@@ -37,5 +40,5 @@ include(ExternalProject)
3740
target_link_libraries(
3841
queryosity_extensions
3942
INTERFACE queryosity::queryosity
40-
PUBLIC nlohmann_json::nlohmann_json ${Boost_LIBRARIES}
43+
PUBLIC nlohmann_json::nlohmann_json rapidcsv ${Boost_LIBRARIES}
4144
)

0 commit comments

Comments
 (0)