Skip to content

Commit 4301a95

Browse files
authored
Benchmark of Python backends (#84)
* Python benchmark option * Benchmark LTO & Ffloating point * Series type fix * Cuml draft * Benchmark - uncovered the faulty test
1 parent 3ad4c00 commit 4301a95

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+743
-60
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,11 @@ jobs:
110110
- name: build
111111
run: ./ci-default --compiler ${{ matrix.compiler }} --lto -j 1 ${{ matrix.options }} --native --unity --build-r
112112
- name: run demo
113-
run: ./ci-default --compiler ${{ matrix.compiler }} --lto -j 1 ${{ matrix.options }} --native --unity --build-r --run-demo
113+
run: ./ci-default --compiler ${{ matrix.compiler }} --lto ${{ matrix.options }} --native --unity --build-r --run-demo
114+
- name: benchmark across frameworks
115+
run: |
116+
pip3 install darts
117+
./ci-default --compiler ${{ matrix.compiler }} --lto ${{ matrix.options }} --native --unity --build-r --benchmark
114118
115119
build-macos:
116120
runs-on: ${{ matrix.os }}

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,20 @@ export LD_LIBRARY_PATH=$R_HOME/lib
152152
./tsqsim
153153
```
154154

155+
## Python backends
156+
`tsqsim` is able to wrap the Python 3rd party TSA frameworks. You may either write your own wrapper or use the already available ones.
157+
158+
### Available python backends
159+
The following Python backends are currently available:
160+
161+
| Name | installation | script |
162+
| ----- | ------------ | ------ |
163+
| [statsmodels](https://www.statsmodels.org/) | pip install darts | scripts/py_darts.py |
164+
| [darts](https://github.com/unit8co/darts) | pip install statsmodels | scripts/py_statsmodels.py |
165+
166+
### Extending python backends
167+
TODO! Wrap predict and convert from dataframe and return a timeseries.
168+
155169
# Development
156170
For the development use case, it's recommended to turn on certain optimizations, that reduce the recompilation and linking time while changing the source code often. The optimizations are: dynamic linking (*shared*), precompiled headers (*pch*) and (optionally) a networked parallel compiler's wrapper, [icecc](https://github.com/icecc/icecream).
157171

src/lib-base/src/CLI.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ EnjoLib::Result<CLIResult> CLI::GetConfigs(int argc, char ** argv) const
2727
const char * OPT_OUT_DIR = "out";
2828
const char * OPT_LAGS = "lags";
2929
const char * OPT_PER_SEASONAL = "per-seasonal";
30+
const char * OPT_BENCHMARK = "benchmark";
31+
const char * OPT_PLOT = "plot";
3032

3133

3234
EnjoLib::ProgramOptionsState popState;
@@ -43,6 +45,9 @@ EnjoLib::Result<CLIResult> CLI::GetConfigs(int argc, char ** argv) const
4345

4446
popState.AddInt(OPT_LAGS, ConfigTS::DESCR_PLOT_LAGS_NUM);
4547
popState.AddInt(OPT_PER_SEASONAL, ConfigTS::DESCR_PLOT_PERIOD_NUM);
48+
49+
popState.AddBool(OPT_BENCHMARK, ConfigTS::DESCR_BENCHMARK);
50+
popState.AddBool(OPT_PLOT, ConfigTS::DESCR_PLOT_PYTHON);
4651

4752

4853
popState.ReadArgs(argc, argv);
@@ -91,6 +96,8 @@ EnjoLib::Result<CLIResult> CLI::GetConfigs(int argc, char ** argv) const
9196
confTS.PLOT_LAGS_NUM = pops.GetIntFromMap(OPT_LAGS);
9297
confTS.PLOT_PERIOD_NUM = pops.GetIntFromMap(OPT_PER_SEASONAL);
9398
confTS.m_outDir = pops.GetStrFromMap(OPT_OUT_DIR);
99+
confTS.BENCHMARK = pops.GetBoolFromMap(OPT_BENCHMARK);
100+
confTS.PLOT_PYTHON = pops.GetBoolFromMap(OPT_PLOT);
94101

95102
//auto pluginName = pops.GetStrFromMap (OPT_PLUGIN);
96103

src/lib-base/src/ConfigTS.cpp

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "ConfigDirs.h"
44
#include "Consts.h"
5+
#include "PriceType.h"
56

67
//#include <Util/Except.hpp>
78
#include <Ios/Osstream.hpp>
@@ -11,14 +12,20 @@
1112
using namespace std;
1213
using namespace EnjoLib;
1314

14-
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME = "ts-xform-script.txt";
15-
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME_R = "r-custom.R";
15+
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME = "ts-xform-script.txt";
16+
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME_R = "r-custom.R";
17+
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME_PY = "py_statsmodels.py";
1618
const char * ConfigTS::DEFAULT_SCRIPT_FILE_NAME_GEN = "data-generation.txt";
1719

1820

1921
ConfigTS::~ConfigTS(){}
2022
ConfigTS::ConfigTS()
2123
{
24+
const ConfigDirs dirs;
25+
m_scriptPathTxt = dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME;
26+
m_scriptPathTxtR = dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME_R;
27+
m_scriptPathTxtGen = dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME_GEN;
28+
m_scriptPathTxtPy = dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME_PY;
2229
}
2330

2431
EnjoLib::Str ConfigTS::GetFileNameProt() const
@@ -35,17 +42,19 @@ void ConfigTS::RegisterAndReadBools(EnjoLib::Istream & f)
3542
RegisterAndReadBool(f, crashOnRecoverableErrors, 0, "Crash on err", "Crash on recoverable errors");
3643
RegisterAndReadBool(f, PLOT_SERIES, 1, "Plot series", "Plot output series after evaluation");
3744
RegisterAndReadBool(f, PLOT_BASELINE, 1, "Plot baseline", "Plot baseline prediction in QT app");
38-
RegisterAndReadBool(f, PLOT_PYTHON, 0, "Plot with Python", "Plot the series in Python backend");
45+
RegisterAndReadBool(f, PLOT_PYTHON, 0, "Plot with Python", DESCR_PLOT_PYTHON);
3946
RegisterAndReadBool(f, PLOT_PYTHON_ACF, 0, "Plot ACF with Python", "Plot the Auto Correlation Function in Python backend");
4047
RegisterAndReadBool(f, MT_XFORM, 1, "MT xform", "Perform the transformations in a multithreaded way (still unstable)");
4148
RegisterAndReadBool(f, MT_REPORT, 0, "MT report", "Generate report in multithreaded way (still unstable)");
4249
RegisterAndReadBool(f, USE_VECTOR_PRED, 1, "Opti vec pred", "Use optimized vectored prediction");
50+
RegisterAndReadBool(f, BENCHMARK, 0, "Benchmark", DESCR_BENCHMARK);
51+
4352
}
4453

4554
void ConfigTS::RegisterAndReadInts(EnjoLib::Istream & f)
4655
{
4756
RegisterAndReadInt (f, PRED_TYPE, 0);
48-
RegisterAndReadInt (f, PRICE_TYPE, 0);
57+
RegisterAndReadInt (f, PRICE_TYPE, static_cast<long int>(PriceType::HIGH));
4958
RegisterAndReadInt (f, PLOT_LAGS_NUM, 30);
5059
RegisterAndReadInt (f, PLOT_PERIOD_NUM, 30);
5160
}
@@ -54,13 +63,19 @@ void ConfigTS::RegisterAndReadFloats(EnjoLib::Istream & f)
5463
}
5564
void ConfigTS::RegisterAndReadStrs(EnjoLib::Istream & f)
5665
{
57-
const ConfigDirs dirs;
58-
RegisterAndReadStr(f, m_scriptPathTxt, dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME);
59-
RegisterAndReadStr(f, m_scriptPathTxtR, dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME_R);
60-
RegisterAndReadStr(f, m_scriptPathTxtGen, dirs.DIR_SCRIPTS2 + DEFAULT_SCRIPT_FILE_NAME_GEN);
66+
RegisterAndReadStr(f, m_scriptPathTxt, m_scriptPathTxt);
67+
RegisterAndReadStr(f, m_scriptPathTxtR, m_scriptPathTxtR);
68+
RegisterAndReadStr(f, m_scriptPathTxtGen, m_scriptPathTxtGen);
69+
RegisterAndReadStr(f, m_scriptPathTxtPy, m_scriptPathTxtPy);
6170
//RegisterAndReadStr(f, m_outDir, "");
6271
}
6372

73+
void ConfigTS::SetScriptNamePy(const EnjoLib::Str & name)
74+
{
75+
const ConfigDirs dirs;
76+
m_scriptPathTxtPy = dirs.DIR_SCRIPTS2 + name;
77+
}
78+
6479
PredictorType ConfigTS::GetPredType() const
6580
{
6681
return static_cast<PredictorType>(PRED_TYPE);
@@ -92,4 +107,16 @@ void ConfigTS::UpdateFromOther(const ConfigTS & cfgTSCmdLine)
92107
{
93108
PLOT_PERIOD_NUM = cfgTSCmdLine.PLOT_PERIOD_NUM;
94109
}
110+
if (cfgTSCmdLine.PRED_TYPE >= 0)
111+
{
112+
PRED_TYPE = cfgTSCmdLine.PRED_TYPE;
113+
}
114+
if (cfgTSCmdLine.BENCHMARK)
115+
{
116+
BENCHMARK = cfgTSCmdLine.BENCHMARK;
117+
}
118+
if (cfgTSCmdLine.PLOT_PYTHON)
119+
{
120+
PLOT_PYTHON = cfgTSCmdLine.PLOT_PYTHON;
121+
}
95122
}

src/lib-base/src/ConfigTS.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,32 @@ class ConfigTS : public ConfigBase
2525

2626
PriceType GetPriceType() const;
2727
void SetPriceType(const PriceType & type);
28+
29+
void SetScriptNamePy(const EnjoLib::Str & name);
2830

2931
void UpdateFromOther(const ConfigTS & cfgTSCmdLine);
3032

3133

3234
static const char * DEFAULT_SCRIPT_FILE_NAME;
3335
static const char * DEFAULT_SCRIPT_FILE_NAME_R;
36+
static const char * DEFAULT_SCRIPT_FILE_NAME_PY;
3437
static const char * DEFAULT_SCRIPT_FILE_NAME_GEN;
3538

3639
EnjoLib::Str m_scriptPathTxt;
3740
EnjoLib::Str m_scriptPathTxtR;
41+
EnjoLib::Str m_scriptPathTxtPy;
3842
EnjoLib::Str m_scriptPathTxtGen;
3943
EnjoLib::Str m_outDir;
4044

4145
bool crashOnRecoverableErrors = false;
4246
bool PLOT_SERIES = true;
4347
bool PLOT_BASELINE = true;
44-
bool PLOT_PYTHON = false;
48+
bool PLOT_PYTHON = false; constexpr static const char * DESCR_PLOT_PYTHON = "Plot the series in Python backend";
4549
bool PLOT_PYTHON_ACF = false;
4650
bool MT_REPORT = false;
4751
bool MT_XFORM = false;
4852
bool USE_VECTOR_PRED = false;
53+
bool BENCHMARK = false; constexpr static const char * DESCR_BENCHMARK = "Benchmark the simulator against other frameworks";
4954

5055
long int PRED_TYPE = 0;
5156
long int PRICE_TYPE = 0;

src/lib-base/src/IPredictor.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,8 @@ void IPredictor::AssertNoLookaheadBias(const EnjoLib::VecD & data, const EnjoLib
7878
/// TODO: eps must be a percentage!
7979
Assertions::IsTrue (Logic::DoublesEqual(predLast, predVec.Last(), 0.01), "DoublesEqual(predLast, predVec)");
8080
}
81+
82+
bool IPredictor::IsCustomScript() const
83+
{
84+
return false;
85+
}

src/lib-base/src/IPredictor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class IPredictor : public IOptimizable, public IHasLen
4646

4747
virtual EnjoLib::VecD AssertNoLookaheadBiasGetVec(const EnjoLib::VecD & data) const;
4848
virtual void AssertNoLookaheadBias(const EnjoLib::VecD & data, const EnjoLib::VecD & predVec) const;
49+
50+
virtual bool IsCustomScript() const;
4951

5052
/// The error returned when there's not enough data for the prediction yet.
5153
static const double ERROR;

src/lib-base/src/PredictorAR.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "PredictorUtil.h"
33
#include "ITSFun.h"
44

5-
//#include <Util/CoutBuf.hpp>
5+
#include <Util/CoutBuf.hpp>
66

77
using namespace EnjoLib;
88

@@ -12,21 +12,38 @@ PredictorAR::PredictorAR(const IDataProvider & dat, const EnjoLib::Str & name)
1212

1313
PredictorAR::~PredictorAR(){}
1414

15+
/*
1516
EnjoLib::VecD PredictorAR::PredictVec(const EnjoLib::VecD & data) const
1617
{
1718
const int numSamples = GetLags();
1819
const EnjoLib::VecD & predAR = PredictorUtil().Regression(numSamples, data, false);
1920
return predAR;
2021
}
21-
22+
*/
2223
double PredictorAR::PredictNext(const BufferDouble & datExpanding) const
2324
{
24-
const VecD & vec = PredictVec(datExpanding.GetData());
25-
return vec.Last();
25+
const int lags = GetLags();
26+
if (datExpanding.Len() < lags + 1)
27+
{
28+
return IPredictor::ERROR;
29+
}
30+
VecD ret;
31+
for (int lag = 0; lag < lags; ++lag)
32+
{
33+
//const EnjoLib::VecD & predVec = PredictorUtil().RegressionProt(lag, datExpanding.GetData(), datExpanding.Len(), false);
34+
//const double pred = predVec.Last();
35+
const double pred = PredictorUtil().RegressionProt(lag, datExpanding.GetData(), datExpanding.Len(), false);
36+
ret.Add(pred);
37+
//LOGL << "Pred lag " << lag << " = " << pred << ", ret = " << ret.Mean() << Nl;
38+
}
39+
return ret.Mean();
40+
41+
//const VecD & vec = PredictVec(datExpanding.GetData());
42+
//return vec.Last();
2643
}
2744

2845
unsigned PredictorAR::GetLags() const
2946
{
30-
//return 11;
47+
return 2;
3148
return GetLag1().GetVal();
3249
}

src/lib-base/src/PredictorAR.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class PredictorAR : public PredictorBase
1010
PredictorAR(const IDataProvider & dat, const EnjoLib::Str & name = "AR");
1111
virtual ~PredictorAR();
1212

13-
EnjoLib::VecD PredictVec(const EnjoLib::VecD & data) const override;
13+
//EnjoLib::VecD PredictVec(const EnjoLib::VecD & data) const override;
1414
double PredictNext(const BufferDouble & datExpanding) const override;
1515
unsigned GetLags() const override;
1616

src/lib-base/src/PredictorARMA.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ PredictorARMA::PredictorARMA(const IDataProvider & dat)
1616
}
1717
PredictorARMA::~PredictorARMA(){}
1818

19+
/*
1920
EnjoLib::VecD PredictorARMA::PredictVec(const EnjoLib::VecD & data) const
2021
{
2122
const int numSamplesMA = GetLag1().GetVal(); // ParQ
@@ -27,11 +28,33 @@ EnjoLib::VecD PredictorARMA::PredictVec(const EnjoLib::VecD & data) const
2728
2829
return predARMA;
2930
}
30-
31+
*/
3132
double PredictorARMA::PredictNext(const BufferDouble & datExpanding) const
3233
{
33-
const VecD & vec = PredictVec(datExpanding.GetData());
34-
return vec.Last();
34+
const int lags = GetLags();
35+
if (datExpanding.Len() < lags + 1)
36+
{
37+
return IPredictor::ERROR;
38+
}
39+
const PredictorUtil util;
40+
VecD ret;
41+
for (int lag = 0; lag < lags; ++lag)
42+
{
43+
const EnjoLib::VecD & predAR = m_predAR.Predict(datExpanding.GetData());
44+
const EnjoLib::VecD & errors = util.GetErrorsCorrected(predAR, datExpanding.GetData());
45+
//const EnjoLib::VecD & predVec = PredictorUtil().RegressionProt(lag, datExpanding.GetData(), datExpanding.Len(), false);
46+
//const double pred = predVec.Last();
47+
const double pred = PredictorUtil().RegressionProt(lag, datExpanding.GetData(), datExpanding.Len(), false);
48+
ret.Add(pred);
49+
//LOGL << "Pred lag " << lag << " = " << pred << ", ret = " << ret.Mean() << Nl;
50+
}
51+
return ret.Mean();
52+
53+
//const VecD & vec = PredictVec(datExpanding.GetData());
54+
//return vec.Last();
55+
56+
//const VecD & vec = PredictVec(datExpanding.GetData());
57+
//return vec.Last();
3558
}
3659

3760
unsigned PredictorARMA::GetLags() const

0 commit comments

Comments
 (0)