From f6f3b9954bd8506e9cbeda0bda18c422e9e0ad25 Mon Sep 17 00:00:00 2001
From: ben-j-l2f <57758400+ben-j-l2f@users.noreply.github.com>
Date: Fri, 20 Dec 2019 14:40:32 +0100
Subject: [PATCH 1/7] Update README.rst
---
README.rst | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 181 insertions(+), 1 deletion(-)
diff --git a/README.rst b/README.rst
index cae0608..ba3069a 100644
--- a/README.rst
+++ b/README.rst
@@ -9,5 +9,185 @@
giotto-time
===========
-Time series analysis suite
+Machine learning based time series forecasting tools for python.
+
+Overview
+========
+
+giotto-time is a time series forecasting library in Python. The main novelties compared to traditional time series libraries are the following:
+
+* Feature creation, model selection, model assessment and prediction pipeline for time series models.
+
+* Plug-and-play availability of any scikit-learn-compatible (i.e., in the fit-transform framework) regression or classification models for forecasting.
+
+* Minimization of standard and custom loss functions for time series (SMAPE, max error, etc..).
+
+* Easy-to-use scikit-learn-familiar and pandas-familiar API.
+
+* Additionally we provide a causality tests with a scikit-learn-like transformer interface.
+
+Time Series Forecasting Model
+=============================
+
+Giotto-time provide the GAR class (Generalize Auto Regressive model). It operates in a similar way to the standard AR, but with an arbitrary number of features and with an arbitrary underlying regression model.
+
+.. raw:: html
+
+
+
+
+
+This model allows the full force of machine learning regressors (compatible with the fit-transform framework ok scikit-learn) to be combined with advanced feature creation stratagies to forecast time series in a convienent api.
+
+>>> from giottotime.feature_creation import FeaturesCreation
+>>> from giottotime.feature_creation.index_independent_features import ShiftFeature, MovingAverageFeature
+>>> from giottotime.model_selection.train_test_splitter import TrainTestSplitter
+>>> from giottotime.regressors import LinearRegressor
+>>> from giottotime.models.time_series_models import GAR
+>>>
+>>> time_series = get_time_series()
+>>>
+>>> features_creation = FeaturesCreation(
+>>> horizon=4,
+>>> features = [ShiftFeature(1), ShiftFeature(2), MovingAverageFeature(5)]
+>>> )
+>>>
+>>> train_test_splitter = TrainTestSplitter()
+>>> time_series_model = GAR(base_model=LinearRegressor())
+>>>
+>>> X, y = features_creation.transform(time_series)
+>>> X_train, y_train, X_test, y_test = train_test_splitter.transform(X, y)
+>>>
+>>> time_series_model.fit(X_train, y_train)
+>>> predictions = time_series_model.predict(X_test)
+
+Time Series Preparation
+=======================
+
+To transform an input array-like structure into a DataFrame with a PeriodIndex we provide the classes:
+
+* TimeSeriesPreparation
+* TimeSeriesConversion
+* SequenceToTimeIndexSeries
+* PandasSeriesToTimeIndexSeries
+* TimeIndexSeriesToPeriodIndexSeries
+
+Feature Creation
+================
+
+The following time series features are currently supported:
+
+* CalendarFeature
+* PeriodicSeasonalFeature
+* ShiftFeature
+* MovingAverageFeature
+* ConstantFeature
+* PolynomialFeature
+* ExogenousFeature
+* CustomFeature
+
+These features all have a scikit-learn-like interface and behave as transformers.
+
+The class FeatureCreation wraps a list of features together and returns the X and y matrices from a time series given as input.
+
+Time Series Trend Model
+=======================
+
+We provide main classes to analyze and remove trends from time series in order to create trend stationary time series.
+
+Specifically, giotto-time includes ExponentialTrend, PolynomialTrend model classes and de-trending transformers.
+
+>>> import numpy as np
+>>> import pandas as pd
+>>>
+>>> import matplotlib.pyplot as plt
+>>>
+>>> from giottotime.models.regressors.linear_regressor import LinearRegressor
+>>> from giottotime.loss_functions.loss_functions import max_error, smape
+>>>
+>>> from giottotime.models.trend_models.polynomial_trend import PolynomialTrend
+>>>
+>>> from math import pi
+>>>
+>>> d = pd.read_csv('trend.csv', index_col=0, parse_dates=True)
+>>> tm = PolynomialTrend(order=3)
+>>>
+>>> tm.fit(d)
+>>>
+>>> d.plot(figsize=(10, 10))
+>>> plt.show()
+>>>
+>>> detrended = tm.transform(d)
+>>>
+>>> detrended.plot(figsize=(10, 10))
+>>> plt.show()
+
+.. raw:: html
+
+
+
+  |
+  |
+
+
+
+Before the detrending tranformer, a clear quadratic trend is present in the data. For additional information on trend stationarity, see: Trend stationarity: Wikipedia - https://en.wikipedia.org/wiki/Trend_stationary.
+
+Custom Regressors
+=================
+
+LinearRegressor is a linear regressor class that minimizes a custom loss function (compatitble with all scikit-learn metrics).
+
+.. raw:: html
+
+
+
+
+
+In time series forecasting, it can be essential to minimize error metrics other than the standard R squared. Using this regressor class, it is possible to fit smape, max error and a range of other time series forecasting metrics easily with a simple interface via the GAR class.
+
+>>> from giottotime.models.regressors.linear_regressor import LinearRegressor
+>>> from giottotime.loss_functions import max_error
+>>> import numpy as np
+>>> import pandas as pd
+>>> X = np.random.random((100, 10))
+>>> y = np.random.random(100)
+>>> lr = LinearRegressor(loss=max_error)
+>>> X_train, y_train = X[:90], y[:90]
+>>> X_test, y_test = X[90:], y[90:]
+>>> x0 = [0]*11
+>>> lr.fit(X_train, y_train, x0=x0)
+>>> y_pred = lr.predict(X_test)
+
+Causality Tests
+===============
+
+We provide two tests: ShiftedLinearCoefficient and ShiftedPearsonCorrelation.
+
+These tests (which are impliemnted as scikit-learn compatible transformers) determine which shift of each time series maximizes the correlation to each other input time series. This is a very similar construction tothe granger test.
+
+An example use is shown below.
+
+>>> from giottotime.causality_tests.shifted_linear_coefficient import ShiftedLinearCoefficient
+>>> import pandas.util.testing as testing
+>>> data = testing.makeTimeDataFrame(freq="s")
+>>> slc = ShiftedLinearCoefficient(target_col="A")
+>>> slc.fit(data)
+>>> slc.best_shifts_
+y A B C D
+x
+A 3 6 8 5
+B 9 9 4 1
+C 8 2 4 9
+D 3 9 4 3
+>>> slc.max_corrs_
+y A B C D
+x
+A 0.460236 0.420005 0.339370 0.267143
+B 0.177856 0.300350 0.367150 0.550490
+C 0.484860 0.263036 0.456046 0.251342
+D 0.580068 0.344688 0.253626 0.256220
+
+The target-col input variable to the constructor is used in the transform method. It determins which set of shifts are applied to all inputs. For example, if 'A' is selected, each column will be transform by a shift corresponding to the 'A' row of the bests_shifts_ pivot table.
+
From b7cc240d7bbbde3cf863a78ac0d00563e438b11c Mon Sep 17 00:00:00 2001
From: Alessio Baccelli
Date: Fri, 20 Dec 2019 15:17:20 +0100
Subject: [PATCH 2/7] Fix in polynomial order (#91)
---
.../tests/test_trend_features.py | 80 +++++++++----------
.../trend_features.py | 42 +++++-----
.../models/trend_models/polynomial_trend.py | 5 +-
3 files changed, 64 insertions(+), 63 deletions(-)
diff --git a/giottotime/feature_creation/index_dependent_features/tests/test_trend_features.py b/giottotime/feature_creation/index_dependent_features/tests/test_trend_features.py
index 0b9ee92..3cf249b 100644
--- a/giottotime/feature_creation/index_dependent_features/tests/test_trend_features.py
+++ b/giottotime/feature_creation/index_dependent_features/tests/test_trend_features.py
@@ -18,26 +18,26 @@ def test_correct_index_features():
ts_t = detrend_feature.transform(ts)
expected_ts = pd.DataFrame(
[
- -2.334165e-07,
- -2.080005e-07,
- -1.825846e-07,
- -1.571686e-07,
- -1.317526e-07,
- -1.063366e-07,
- -8.092063e-08,
- -5.550465e-08,
- -3.008866e-08,
- -4.672680e-09,
- 2.074330e-08,
- 4.615929e-08,
- 7.157527e-08,
- 9.699125e-08,
- 1.224072e-07,
- 1.478232e-07,
- 1.732392e-07,
- 1.986552e-07,
- 2.240712e-07,
- 2.494872e-07,
+ 1.22681324e-05,
+ 8.34525141e-06,
+ 4.86108426e-06,
+ 1.81563099e-06,
+ -7.91108403e-07,
+ -2.95913392e-06,
+ -4.68844555e-06,
+ -5.97904330e-06,
+ -6.83092717e-06,
+ -7.24409716e-06,
+ -7.21855327e-06,
+ -6.75429551e-06,
+ -5.85132385e-06,
+ -4.50963832e-06,
+ -2.72923891e-06,
+ -5.10125625e-07,
+ 2.14770155e-06,
+ 5.24424260e-06,
+ 8.77949753e-06,
+ 1.27534663e-05,
],
columns=[output_name],
index=time_index,
@@ -54,26 +54,26 @@ def test_correct_polynomial_trend():
ts_t = detrend_feature.transform(ts)
expected_ts = pd.DataFrame(
[
- 1.226813e-05,
- 8.345251e-06,
- 4.861084e-06,
- 1.815631e-06,
- -7.911084e-07,
- -2.959134e-06,
- -4.688446e-06,
- -5.979043e-06,
- -6.830927e-06,
- -7.244097e-06,
- -7.218553e-06,
- -6.754296e-06,
- -5.851324e-06,
- -4.509638e-06,
- -2.729239e-06,
- -5.101256e-07,
- 2.147702e-06,
- 5.244243e-06,
- 8.779498e-06,
- 1.275347e-05,
+ -6.59832896e-04,
+ -2.39271025e-04,
+ 6.38171382e-05,
+ 2.63303644e-04,
+ 3.73060540e-04,
+ 4.06959876e-04,
+ 3.78873701e-04,
+ 3.02674064e-04,
+ 1.92233012e-04,
+ 6.14225966e-05,
+ -7.58851350e-05,
+ -2.05818134e-04,
+ -3.14504350e-04,
+ -3.88071736e-04,
+ -4.12648241e-04,
+ -3.74361819e-04,
+ -2.59340418e-04,
+ -5.37119912e-05,
+ 2.56395511e-04,
+ 6.84854138e-04,
],
columns=[output_name],
index=time_index,
diff --git a/giottotime/feature_creation/index_dependent_features/trend_features.py b/giottotime/feature_creation/index_dependent_features/trend_features.py
index 442e991..c29c4a7 100644
--- a/giottotime/feature_creation/index_dependent_features/trend_features.py
+++ b/giottotime/feature_creation/index_dependent_features/trend_features.py
@@ -38,16 +38,16 @@ class DetrendedFeature(IndexDependentFeature):
>>> X = pd.DataFrame(range(0, 10), index=time_index)
>>> detrend_feature.transform(X)
DetrendedFeature
- 2020-01-01 9.180937e-07
- 2020-01-02 8.020709e-07
- 2020-01-03 6.860481e-07
- 2020-01-04 5.700253e-07
- 2020-01-05 4.540024e-07
- 2020-01-06 3.379796e-07
- 2020-01-07 2.219568e-07
- 2020-01-08 1.059340e-07
- 2020-01-09 -1.008878e-08
- 2020-01-10 -1.261116e-07
+ 2020-01-01 2.092234e-06
+ 2020-01-02 6.590209e-07
+ 2020-01-03 -4.104701e-07
+ 2020-01-04 -1.116238e-06
+ 2020-01-05 -1.458284e-06
+ 2020-01-06 -1.436607e-06
+ 2020-01-07 -1.051207e-06
+ 2020-01-08 -3.020852e-07
+ 2020-01-09 8.107597e-07
+ 2020-01-10 2.287327e-06
"""
@@ -101,22 +101,22 @@ class RemovePolynomialTrend(DetrendedFeature):
>>> X = pd.DataFrame(range(0, 10), index=time_index)
>>> detrend_feature.transform(X)
RemovePolynomialTrend
- 2020-01-01 -0.000036
- 2020-01-02 0.000022
- 2020-01-03 0.000042
- 2020-01-04 0.000035
- 2020-01-05 0.000012
- 2020-01-06 -0.000016
- 2020-01-07 -0.000037
- 2020-01-08 -0.000040
- 2020-01-09 -0.000015
- 2020-01-10 0.000050
+ 2020-01-01 0.000969
+ 2020-01-02 -0.001216
+ 2020-01-03 -0.000915
+ 2020-01-04 0.000201
+ 2020-01-05 0.001021
+ 2020-01-06 0.000995
+ 2020-01-07 0.000133
+ 2020-01-08 -0.000993
+ 2020-01-09 -0.001249
+ 2020-01-10 0.001059
"""
def __init__(
self,
- polynomial_order: int = 1,
+ polynomial_order: int = 2,
loss: Callable = mean_squared_error,
output_name: str = "RemovePolynomialTrend",
):
diff --git a/giottotime/models/trend_models/polynomial_trend.py b/giottotime/models/trend_models/polynomial_trend.py
index ec13664..b6e71f7 100644
--- a/giottotime/models/trend_models/polynomial_trend.py
+++ b/giottotime/models/trend_models/polynomial_trend.py
@@ -58,7 +58,7 @@ def prediction_loss(weights: np.ndarray) -> float:
predictions = [p(t) for t in range(0, ts.shape[0])]
return self.loss(ts.values, predictions)
- model_weights = np.zeros(self.order)
+ model_weights = np.zeros(self.order + 1)
res = minimize(
prediction_loss, model_weights, method=self.method, options={"disp": False}
@@ -114,8 +114,9 @@ def transform(self, ts: pd.DataFrame) -> pd.DataFrame:
The transformed time series, without the trend.
"""
- p = np.poly1d(self.model_weights_)
+ check_is_fitted(self)
+ p = np.poly1d(self.model_weights_)
time_steps = (ts.index - self.t0_) / self.period_
predictions = pd.Series(index=ts.index, data=[p(t) for t in time_steps])
From 2273cb84254b340910c9e87f9dc8f9f7686d6ff8 Mon Sep 17 00:00:00 2001
From: Stefano
Date: Thu, 19 Dec 2019 11:17:34 +0100
Subject: [PATCH 3/7] Added examples folder with README
Signed-off-by: Stefano
---
examples/README.rst | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
create mode 100644 examples/README.rst
diff --git a/examples/README.rst b/examples/README.rst
new file mode 100644
index 0000000..51189c4
--- /dev/null
+++ b/examples/README.rst
@@ -0,0 +1,27 @@
+.. image:: https://www.giotto.ai/static/vector/logo.svg
+ :width: 850
+
+Examples and Tutorials
+======================
+
+In this folder you can find basic tutorials and examples: you can read through them to
+understand how `giotto-time` works.
+
+Quick start
+-----------
+
+This tutorial is about giving an overview on the basic features of `giotto-time`.
+You will learn how to train a simple time series model with custom features.
+Some considerations on input-output are presented.
+
+Details and advanced features
+-----------------------------
+
+This tutorial details more advanced features of `giotto-time`.
+You will learn more details on the feature generation and on custom linear regressor
+model for time series forecasting.
+
+Causality Tests
+---------------
+
+This tutorial details the causality tests built-in in `giotto-time`.
From b311189399bc2686047df6cc486680706cc7d7d4 Mon Sep 17 00:00:00 2001
From: Stefano
Date: Thu, 19 Dec 2019 15:59:46 +0100
Subject: [PATCH 4/7] Bug fix in time series preparation
Signed-off-by: Stefano
---
.../tests/test_time_series_preparation.py | 33 +++++++++++++++++++
.../time_series_preparation.py | 12 ++++---
2 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/giottotime/time_series_preparation/tests/test_time_series_preparation.py b/giottotime/time_series_preparation/tests/test_time_series_preparation.py
index 33e8821..c455320 100644
--- a/giottotime/time_series_preparation/tests/test_time_series_preparation.py
+++ b/giottotime/time_series_preparation/tests/test_time_series_preparation.py
@@ -128,6 +128,39 @@ def test_wrong_input_type(self, wrong_input: Tuple):
with pytest.raises(TypeError):
time_series_preparation._to_time_index_series(wrong_input)
+ @given(series_with_period_index(), st.datetimes(), available_freqs())
+ def test_period_index_dataframe_unchanged(
+ self, period_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+ ):
+ period_index_dataframe = pd.DataFrame(period_index_series)
+ time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+ computed_time_series = time_series_preparation._to_time_index_series(
+ period_index_dataframe
+ )
+ assert_series_equal(computed_time_series, period_index_series)
+
+ @given(series_with_datetime_index(), st.datetimes(), available_freqs())
+ def test_datetime_index_dataframe_unchanged(
+ self, datetime_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+ ):
+ datetime_index_dataframe = pd.DataFrame(datetime_index_series)
+ time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+ computed_time_series = time_series_preparation._to_time_index_series(
+ datetime_index_dataframe
+ )
+ assert_series_equal(computed_time_series, datetime_index_series)
+
+ @given(series_with_timedelta_index(), st.datetimes(), available_freqs())
+ def test_timedelta_index_dataframe_unchanged(
+ self, timedelta_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+ ):
+ timedelta_index_dataframe = pd.DataFrame(timedelta_index_series)
+ time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+ computed_time_series = time_series_preparation._to_time_index_series(
+ timedelta_index_dataframe
+ )
+ assert_series_equal(computed_time_series, timedelta_index_series)
+
class TestToEquispacedTimeSeries:
@given(
diff --git a/giottotime/time_series_preparation/time_series_preparation.py b/giottotime/time_series_preparation/time_series_preparation.py
index 2e909e6..7b63914 100644
--- a/giottotime/time_series_preparation/time_series_preparation.py
+++ b/giottotime/time_series_preparation/time_series_preparation.py
@@ -78,13 +78,15 @@ def __init__(
self.freq
)
- def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFrame:
+ def transform(
+ self, time_series: Union[List, np.array, pd.Series, pd.DataFrame]
+ ) -> pd.DataFrame:
"""Transforms an array-like sequence in a period-index DataFrame with a single
column.
Parameters
----------
- time_series : Union[List, np.array, pd.Series], required
+ time_series : Union[List, np.array, pd.Series, pd.DataFrame], required
The input time series.
Returns
@@ -104,9 +106,11 @@ def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFra
return period_index_dataframe
def _to_time_index_series(
- self, array_like_object: Union[List, np.array, pd.Series]
+ self, array_like_object: Union[List, np.array, pd.Series, pd.DataFrame]
) -> pd.Series:
- if isinstance(array_like_object, pd.Series):
+ if isinstance(array_like_object, pd.DataFrame):
+ return self.pandas_converter.transform(array_like_object.iloc[:, 0])
+ elif isinstance(array_like_object, pd.Series):
return self.pandas_converter.transform(array_like_object)
elif any(
isinstance(array_like_object, type_) for type_ in SUPPORTED_SEQUENCE_TYPES
From 323269fd25791b2b21ad83a302e84a743d14ea20 Mon Sep 17 00:00:00 2001
From: Stefano
Date: Thu, 19 Dec 2019 17:19:43 +0100
Subject: [PATCH 5/7] Added quick-start notebook
Signed-off-by: Stefano
---
examples/quick-start.ipynb | 577 +++++++++++++++++++++++++++++++++++++
1 file changed, 577 insertions(+)
create mode 100644 examples/quick-start.ipynb
diff --git a/examples/quick-start.ipynb b/examples/quick-start.ipynb
new file mode 100644
index 0000000..c24d038
--- /dev/null
+++ b/examples/quick-start.ipynb
@@ -0,0 +1,577 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:34:45.079040Z",
+ "start_time": "2019-12-19T10:34:45.075853Z"
+ }
+ },
+ "source": [
+ "# Giotto-Time\n",
+ "\n",
+ "Welcome to `giotto-time`, our new library for time series forecasting!\n",
+ "\n",
+ "Let's start with an example."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:37:13.829605Z",
+ "start_time": "2019-12-19T10:37:13.827033Z"
+ }
+ },
+ "source": [
+ "## First example"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:51:37.701263Z",
+ "start_time": "2019-12-19T10:51:37.698686Z"
+ }
+ },
+ "source": [
+ "### Ingredients"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:43:03.249232Z",
+ "start_time": "2019-12-19T10:43:03.244743Z"
+ }
+ },
+ "source": [
+ "These are the main ingredients of `giotto-time`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:12:59.958832Z",
+ "start_time": "2019-12-19T14:12:59.307286Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from giottotime.time_series_preparation import TimeSeriesPreparation\n",
+ "from giottotime.feature_creation import FeatureCreation, ShiftFeature, MovingAverageFeature\n",
+ "from giottotime.model_selection import FeatureSplitter\n",
+ "from giottotime.models import GAR"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:43:23.300668Z",
+ "start_time": "2019-12-19T10:43:23.100775Z"
+ }
+ },
+ "source": [
+ "- `TimeSeriesPreparation`: checks the input format of the time series and converts it to the expected format.\n",
+ "- `FeatureCreation`, `ShiftFeature`, `MovingAverageFeature`: create the desired features on the time series that are \n",
+ " used for the forecasting.\n",
+ "- `FeatureSplitter`: prepares the custom `giotto-time` train-test matrices that are used in the model\n",
+ "- `GAR`: generalized-auto-regressive model. This is the only time series model that we provide for the first release."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:50:42.797962Z",
+ "start_time": "2019-12-19T10:50:42.792529Z"
+ }
+ },
+ "source": [
+ "We also need a `scikit-learn`-model. We go for a standard linear regressor for this example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:13:00.294607Z",
+ "start_time": "2019-12-19T14:13:00.291612Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from sklearn.linear_model import LinearRegression"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:51:15.298065Z",
+ "start_time": "2019-12-19T10:51:15.295733Z"
+ }
+ },
+ "source": [
+ "### Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T10:55:55.362286Z",
+ "start_time": "2019-12-19T10:55:55.358045Z"
+ }
+ },
+ "source": [
+ "We use the `pandas.testing` module to create a testing time series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:13:00.964858Z",
+ "start_time": "2019-12-19T14:13:00.961460Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def test_time_series():\n",
+ " from pandas.util import testing as testing\n",
+ " \n",
+ " testing.N, testing.K = 500, 1\n",
+ " df = testing.makeTimeDataFrame( freq=\"D\" )\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:13:01.303804Z",
+ "start_time": "2019-12-19T14:13:01.299245Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "time_series = test_time_series()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T13:37:09.941132Z",
+ "start_time": "2019-12-19T13:37:09.938476Z"
+ }
+ },
+ "source": [
+ "### Time Series Preparation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T13:37:33.357619Z",
+ "start_time": "2019-12-19T13:37:33.347192Z"
+ }
+ },
+ "source": [
+ "The input time series has to be a `pandas.DataFrame` with a `PeriodIndex`. Use the provided class `TimeSeriesPreparation` to convert the time series in this format"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:13:02.320075Z",
+ "start_time": "2019-12-19T14:13:02.317384Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "time_series_preparation = TimeSeriesPreparation()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:13:04.685763Z",
+ "start_time": "2019-12-19T14:13:04.681195Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "period_index_time_series = time_series_preparation.transform(time_series)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:15:31.332440Z",
+ "start_time": "2019-12-19T14:15:31.322583Z"
+ }
+ },
+ "source": [
+ "### Feature Creation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:20:25.312078Z",
+ "start_time": "2019-12-19T14:20:25.307741Z"
+ }
+ },
+ "source": [
+ "The feature creation part is one of the core part of our library and the bridge between traditional time series forecasting techniques and machine learning.\n",
+ "\n",
+ "Starting with a time series in a `pandas.DataFrame`, we create two matrices `X` and `y` which can be used for training and testing."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:49:59.729021Z",
+ "start_time": "2019-12-19T14:49:59.718573Z"
+ }
+ },
+ "source": [
+ "We provide 12 different features. For simplicity we train a model using only `ShiftFeature` and `MovingAverageFeature`. \n",
+ "\n",
+ "`ShiftFeature` provides a temporal shift of the time series. Adding two `ShiftFeature` with shifts 1 and 2 is equivalent to an `AR(2)` model. \n",
+ "\n",
+ "The possibility to add the features that you want allows you to choose the model that best fits your data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:17:42.856996Z",
+ "start_time": "2019-12-19T14:17:42.853237Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "features = [\n",
+ " ShiftFeature(1, output_name='shift_1'),\n",
+ " ShiftFeature(2, output_name='shift_2'),\n",
+ " MovingAverageFeature(3, output_name='moving_average_3'),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:18:17.215100Z",
+ "start_time": "2019-12-19T14:18:17.211908Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "feature_creation = FeatureCreation(time_series_features=features, horizon=3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:21:07.990558Z",
+ "start_time": "2019-12-19T14:21:07.970206Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "features_X, features_y = feature_creation.fit_transform(period_index_time_series)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:42:32.549572Z",
+ "start_time": "2019-12-19T14:42:32.547124Z"
+ }
+ },
+ "source": [
+ "### Train-Test split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:43:02.820280Z",
+ "start_time": "2019-12-19T14:43:02.817384Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "feature_splitter = FeatureSplitter()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:43:40.401560Z",
+ "start_time": "2019-12-19T14:43:40.380814Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X_train, y_train, X_test, y_test = feature_splitter.transform(features_X, features_y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:44:02.820817Z",
+ "start_time": "2019-12-19T14:44:02.818276Z"
+ }
+ },
+ "source": [
+ "### Training"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:45:09.230395Z",
+ "start_time": "2019-12-19T14:45:09.227402Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "model = GAR(base_model=LinearRegression())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:45:09.467974Z",
+ "start_time": "2019-12-19T14:45:09.458956Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "model = model.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:45:20.428649Z",
+ "start_time": "2019-12-19T14:45:20.414290Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "predictions = model.predict(X_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T15:49:35.236013Z",
+ "start_time": "2019-12-19T15:49:35.225037Z"
+ },
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " y_0 | \n",
+ " y_1 | \n",
+ " y_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2001-05-13 | \n",
+ " 0.498604 | \n",
+ " -1.11394 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2001-05-14 | \n",
+ " -1.113940 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " y_0 y_1 y_2\n",
+ "2001-05-13 0.498604 -1.11394 NaN\n",
+ "2001-05-14 -1.113940 NaN NaN"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-19T14:45:22.160170Z",
+ "start_time": "2019-12-19T14:45:22.152303Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " y_0 | \n",
+ " y_1 | \n",
+ " y_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2001-05-13 | \n",
+ " 0.168957 | \n",
+ " 0.024196 | \n",
+ " 0.083588 | \n",
+ "
\n",
+ " \n",
+ " 2001-05-14 | \n",
+ " 0.019240 | \n",
+ " 0.075777 | \n",
+ " 0.110530 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " y_0 y_1 y_2\n",
+ "2001-05-13 0.168957 0.024196 0.083588\n",
+ "2001-05-14 0.019240 0.075777 0.110530"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "predictions"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From bee8c3f440f8fe56413c175f8a453b912dd6f465 Mon Sep 17 00:00:00 2001
From: Stefano
Date: Fri, 20 Dec 2019 12:13:50 +0100
Subject: [PATCH 6/7] Basic example done
Signed-off-by: Stefano
---
examples/quick-start.ipynb | 233 +++++++++++++++++++------------------
1 file changed, 118 insertions(+), 115 deletions(-)
diff --git a/examples/quick-start.ipynb b/examples/quick-start.ipynb
index c24d038..ad099c0 100644
--- a/examples/quick-start.ipynb
+++ b/examples/quick-start.ipynb
@@ -57,8 +57,8 @@
"execution_count": 1,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:12:59.958832Z",
- "start_time": "2019-12-19T14:12:59.307286Z"
+ "end_time": "2019-12-20T11:08:40.433188Z",
+ "start_time": "2019-12-20T11:08:39.863805Z"
}
},
"outputs": [],
@@ -102,8 +102,8 @@
"execution_count": 2,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:13:00.294607Z",
- "start_time": "2019-12-19T14:13:00.291612Z"
+ "end_time": "2019-12-20T11:08:41.268423Z",
+ "start_time": "2019-12-20T11:08:41.265378Z"
}
},
"outputs": [],
@@ -140,8 +140,8 @@
"execution_count": 3,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:13:00.964858Z",
- "start_time": "2019-12-19T14:13:00.961460Z"
+ "end_time": "2019-12-20T11:08:42.074384Z",
+ "start_time": "2019-12-20T11:08:42.070697Z"
}
},
"outputs": [],
@@ -159,8 +159,8 @@
"execution_count": 4,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:13:01.303804Z",
- "start_time": "2019-12-19T14:13:01.299245Z"
+ "end_time": "2019-12-20T11:08:42.366492Z",
+ "start_time": "2019-12-20T11:08:42.361791Z"
}
},
"outputs": [],
@@ -197,8 +197,8 @@
"execution_count": 5,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:13:02.320075Z",
- "start_time": "2019-12-19T14:13:02.317384Z"
+ "end_time": "2019-12-20T11:08:43.161252Z",
+ "start_time": "2019-12-20T11:08:43.158360Z"
}
},
"outputs": [],
@@ -208,11 +208,11 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:13:04.685763Z",
- "start_time": "2019-12-19T14:13:04.681195Z"
+ "end_time": "2019-12-20T11:08:43.428293Z",
+ "start_time": "2019-12-20T11:08:43.421929Z"
}
},
"outputs": [],
@@ -264,11 +264,11 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:17:42.856996Z",
- "start_time": "2019-12-19T14:17:42.853237Z"
+ "end_time": "2019-12-20T11:08:44.450001Z",
+ "start_time": "2019-12-20T11:08:44.446647Z"
}
},
"outputs": [],
@@ -282,11 +282,11 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:18:17.215100Z",
- "start_time": "2019-12-19T14:18:17.211908Z"
+ "end_time": "2019-12-20T11:08:44.737915Z",
+ "start_time": "2019-12-20T11:08:44.734648Z"
}
},
"outputs": [],
@@ -296,11 +296,11 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 9,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:21:07.990558Z",
- "start_time": "2019-12-19T14:21:07.970206Z"
+ "end_time": "2019-12-20T11:08:45.045070Z",
+ "start_time": "2019-12-20T11:08:45.022402Z"
}
},
"outputs": [],
@@ -320,13 +320,25 @@
"### Train-Test split"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-20T10:59:18.112521Z",
+ "start_time": "2019-12-20T10:59:18.108823Z"
+ }
+ },
+ "source": [
+ "We use `FeatureSplitter` to split the matrices X and y in train and test. "
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 10,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:43:02.820280Z",
- "start_time": "2019-12-19T14:43:02.817384Z"
+ "end_time": "2019-12-20T11:08:45.885739Z",
+ "start_time": "2019-12-20T11:08:45.882557Z"
}
},
"outputs": [],
@@ -336,11 +348,11 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 11,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:43:40.401560Z",
- "start_time": "2019-12-19T14:43:40.380814Z"
+ "end_time": "2019-12-20T11:08:46.240108Z",
+ "start_time": "2019-12-20T11:08:46.221414Z"
}
},
"outputs": [],
@@ -360,13 +372,31 @@
"### Training"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-20T11:01:12.922844Z",
+ "start_time": "2019-12-20T11:01:12.919591Z"
+ }
+ },
+ "source": [
+ "We provide a `GAR` (Generalized Auto Regressive) model to forecast the time series.\n",
+ "\n",
+ "The traditional `AR` model is equivalent to our `GAR` model that uses only `ShiftFeature` columns in the `X` matrix.\n",
+ "`GAR` supports all the features compatible with the feature creation step.\n",
+ "\n",
+ "Moreover, `GAR` internally uses a `scikit-learn` compatible model for the internal time series regression. \n",
+ "In this example we use `LinearRegression`. A priori all the `fit-transform-predict` models are compatible (e.g. ridge regression, random forest, boosting, etc.. "
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 12,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:45:09.230395Z",
- "start_time": "2019-12-19T14:45:09.227402Z"
+ "end_time": "2019-12-20T11:08:47.555831Z",
+ "start_time": "2019-12-20T11:08:47.553017Z"
}
},
"outputs": [],
@@ -376,11 +406,11 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 13,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:45:09.467974Z",
- "start_time": "2019-12-19T14:45:09.458956Z"
+ "end_time": "2019-12-20T11:08:48.059122Z",
+ "start_time": "2019-12-20T11:08:48.050062Z"
}
},
"outputs": [],
@@ -389,95 +419,54 @@
]
},
{
- "cell_type": "code",
- "execution_count": 32,
+ "cell_type": "markdown",
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:45:20.428649Z",
- "start_time": "2019-12-19T14:45:20.414290Z"
+ "end_time": "2019-12-20T11:01:01.280526Z",
+ "start_time": "2019-12-20T11:01:01.278125Z"
}
},
- "outputs": [],
"source": [
- "predictions = model.predict(X_test)"
+ "### Forecasting"
]
},
{
- "cell_type": "code",
- "execution_count": 36,
+ "cell_type": "markdown",
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T15:49:35.236013Z",
- "start_time": "2019-12-19T15:49:35.225037Z"
- },
- "scrolled": true
+ "end_time": "2019-12-20T11:10:02.544672Z",
+ "start_time": "2019-12-20T11:10:02.540859Z"
+ }
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " y_0 | \n",
- " y_1 | \n",
- " y_2 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2001-05-13 | \n",
- " 0.498604 | \n",
- " -1.11394 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 2001-05-14 | \n",
- " -1.113940 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " y_0 y_1 y_2\n",
- "2001-05-13 0.498604 -1.11394 NaN\n",
- "2001-05-14 -1.113940 NaN NaN"
- ]
- },
- "execution_count": 36,
- "metadata": {},
- "output_type": "execute_result"
+ "source": [
+ "We forecast 3 time steps of the time series (we set this parameter in `FeatureCreation`).\n",
+ "\n",
+ "The format of the output is the following:\n",
+ "- the index is the step at which the prediction is made.\n",
+ "- the column `y_1` is the prediction one time step after and so on for `y_2` and `y_3`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2019-12-20T11:08:48.939181Z",
+ "start_time": "2019-12-20T11:08:48.931145Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "y_test"
+ "predictions = model.predict(X_test)"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 15,
"metadata": {
"ExecuteTime": {
- "end_time": "2019-12-19T14:45:22.160170Z",
- "start_time": "2019-12-19T14:45:22.152303Z"
+ "end_time": "2019-12-20T11:08:50.014625Z",
+ "start_time": "2019-12-20T11:08:49.989948Z"
}
},
"outputs": [
@@ -502,35 +491,42 @@
" \n",
" \n",
" | \n",
- " y_0 | \n",
" y_1 | \n",
" y_2 | \n",
+ " y_3 | \n",
"
\n",
" \n",
" \n",
" \n",
+ " 2001-05-12 | \n",
+ " -0.149298 | \n",
+ " -0.164899 | \n",
+ " -0.092473 | \n",
+ "
\n",
+ " \n",
" 2001-05-13 | \n",
- " 0.168957 | \n",
- " 0.024196 | \n",
- " 0.083588 | \n",
+ " -0.150681 | \n",
+ " -0.085710 | \n",
+ " -0.063871 | \n",
"
\n",
" \n",
" 2001-05-14 | \n",
- " 0.019240 | \n",
- " 0.075777 | \n",
- " 0.110530 | \n",
+ " -0.066199 | \n",
+ " -0.134353 | \n",
+ " -0.095745 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " y_0 y_1 y_2\n",
- "2001-05-13 0.168957 0.024196 0.083588\n",
- "2001-05-14 0.019240 0.075777 0.110530"
+ " y_1 y_2 y_3\n",
+ "2001-05-12 -0.149298 -0.164899 -0.092473\n",
+ "2001-05-13 -0.150681 -0.085710 -0.063871\n",
+ "2001-05-14 -0.066199 -0.134353 -0.095745"
]
},
- "execution_count": 33,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -538,6 +534,13 @@
"source": [
"predictions"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
From 8484f61b86b96bf9487ad99bff5789750873bba4 Mon Sep 17 00:00:00 2001
From: Stefano
Date: Fri, 20 Dec 2019 17:11:52 +0100
Subject: [PATCH 7/7] Added examples
Signed-off-by: Stefano
---
.gitignore | 1 +
.../time_series_preparation/__init__.py | 2 +-
.../tests/test_time_series_conversion.py | 14 ++++
.../time_series_conversion.py | 73 ++++++++++++++++++-
.../time_series_preparation.py | 49 +++++++++++++
5 files changed, 136 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
index 632e72b..0ea875b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@ target/
# Jupyter NB Checkpoints
.ipynb_checkpoints/
+Untitled*
# exclude data from source control by default
/data/
diff --git a/giottotime/time_series_preparation/__init__.py b/giottotime/time_series_preparation/__init__.py
index de21fa1..d59471a 100755
--- a/giottotime/time_series_preparation/__init__.py
+++ b/giottotime/time_series_preparation/__init__.py
@@ -1,6 +1,6 @@
"""
The :mod:`giottotime.feature_creation` module deals with the preparation of time series
-data, such as index conversions and resampling.
+data, such as conversion to `pandas.DataFrame` with a `PeriodIndex`.
"""
from .time_series_conversion import (
diff --git a/giottotime/time_series_preparation/tests/test_time_series_conversion.py b/giottotime/time_series_preparation/tests/test_time_series_conversion.py
index f23265a..884eb5d 100644
--- a/giottotime/time_series_preparation/tests/test_time_series_conversion.py
+++ b/giottotime/time_series_preparation/tests/test_time_series_conversion.py
@@ -332,6 +332,20 @@ def test_only_timedelta_index_as_input(self, timedelta_index_series: pd.Series):
)
assert_series_equal(computed_series, expected_series)
+ def test_basic_timedelta_index_as_input(self):
+ timedelta_index_series = pd.Series(
+ index=pd.timedelta_range(start=pd.Timedelta(days=1), freq="10D", periods=3),
+ data=[1, 2, 3],
+ )
+ expected_series = pd.Series(
+ index=pd.PeriodIndex(["1970-01-02", "1970-01-12", "1970-01-22"], freq="D"),
+ data=[1, 2, 3],
+ )
+ computed_series = transform_time_index_series_into_period_index_series(
+ timedelta_index_series
+ )
+ assert_series_equal(computed_series, expected_series)
+
@given(series_with_timedelta_index(), available_freqs())
def test_timedelta_index_and_freq_as_input(
self, timedelta_index_series: pd.Series, freq: pd.Timedelta
diff --git a/giottotime/time_series_preparation/time_series_conversion.py b/giottotime/time_series_preparation/time_series_conversion.py
index bf0c2f1..cd04546 100644
--- a/giottotime/time_series_preparation/time_series_conversion.py
+++ b/giottotime/time_series_preparation/time_series_conversion.py
@@ -186,6 +186,19 @@ class SequenceToTimeIndexSeries(TimeSeriesConversion):
frequency of the output time series. Not mandatory for all time series
conversion.
+ Examples
+ --------
+ >>> from giottotime.time_series_preparation import SequenceToTimeIndexSeries
+ >>> time_series = [1,2,3,5,5,7]
+ >>> sequence_to_time_index = SequenceToTimeIndexSeries(start='01-01-2010', freq='10D')
+ >>> sequence_to_time_index.transform(time_series)
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ Freq: 10D, dtype: int64
"""
def __init__(
@@ -209,7 +222,7 @@ def _get_values_from(
class PandasSeriesToTimeIndexSeries(TimeSeriesConversion):
"""Returns a Pandas Series with time index (DatetimeIndex, TimedeltaIndex or
- PeriodIndex from a standard Pandas Series
+ PeriodIndex) from a standard Pandas Series
Parameters
----------
@@ -222,6 +235,20 @@ class PandasSeriesToTimeIndexSeries(TimeSeriesConversion):
freq : pd.Timedelta``, optional, default: ``None``
The frequency of the time series.
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> from giottotime.time_series_preparation import PandasSeriesToTimeIndexSeries
+ >>> time_series = pd.Series([1,2,3,5,5,7])
+ >>> sequence_to_time_index = PandasSeriesToTimeIndexSeries(start='01-01-2010', freq='10D')
+ >>> sequence_to_time_index.transform(time_series)
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ Freq: 10D, dtype: int64
"""
def __init__(
@@ -255,7 +282,8 @@ def _has_time_index(self, time_series: pd.Series) -> bool:
class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion):
- """Converts a series with a time index to a series with a PeriodIndex.
+ """Converts a series with a time index (DatetimeIndex, TimedeltaIndex or
+ PeriodIndex) to a series with a PeriodIndex.
It may be necessary to specify a `freq` if not already provided.
@@ -264,6 +292,47 @@ class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion):
freq : pd.Timedelta, optional, default: ``None``
The frequency of the time series.
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> from giottotime.time_series_preparation import TimeIndexSeriesToPeriodIndexSeries
+ >>> period_index_time_series = pd.Series(
+ ... index = pd.period_range(start='01-01-2010', freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> datetime_index_time_series = pd.Series(
+ ... index = pd.date_range(start='01-01-2010', freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> timedelta_index_time_series = pd.Series(
+ ... index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> sequence_to_time_index = TimeIndexSeriesToPeriodIndexSeries()
+ >>> sequence_to_time_index.transform(period_index_time_series)
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ freq: 10d, dtype: int64
+ >>> sequence_to_time_index.transform(datetime_index_time_series)
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ freq: 10d, dtype: int64
+ >>> sequence_to_time_index.transform(timedelta_index_time_series)
+ 1970-01-02 1
+ 1970-01-12 2
+ 1970-01-22 3
+ 1970-02-01 5
+ 1970-02-11 5
+ 1970-02-21 7
+ Freq: D, dtype: int64
"""
def __init__(self, freq: Optional[pd.Timedelta] = None):
diff --git a/giottotime/time_series_preparation/time_series_preparation.py b/giottotime/time_series_preparation/time_series_preparation.py
index 7b63914..d5685fe 100644
--- a/giottotime/time_series_preparation/time_series_preparation.py
+++ b/giottotime/time_series_preparation/time_series_preparation.py
@@ -51,6 +51,55 @@ class TimeSeriesPreparation:
ValueError
Of the three parameters: start, end, and periods, exactly two must be specified.
+ Examples
+ --------
+ >>> time_series = [1,2,3,5,5,7]
+ >>> period_index_time_series = pd.Series(
+ ... index = pd.period_range(start='01-01-2010', freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> datetime_index_time_series = pd.Series(
+ ... index = pd.date_range(start='01-01-2010', freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> timedelta_index_time_series = pd.Series(
+ ... index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6),
+ ... data=[1,2,3,5,5,7]
+ ... )
+ >>> time_series_preparation = TimeSeriesPreparation()
+ >>> time_series_preparation.transform(time_series)
+ time_series
+ 1970-01-01 1
+ 1970-01-02 2
+ 1970-01-03 3
+ 1970-01-04 5
+ 1970-01-05 5
+ 1970-01-06 7
+ >>> time_series_preparation.transform(period_index_time_series)
+ time_series
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ >>> time_series_preparation.transform(datetime_index_time_series)
+ time_series
+ 2010-01-01 1
+ 2010-01-11 2
+ 2010-01-21 3
+ 2010-01-31 5
+ 2010-02-10 5
+ 2010-02-20 7
+ >>> time_series_preparation.transform(timedelta_index_time_series)
+ time_series
+ 1970-01-02 1
+ 1970-01-12 2
+ 1970-01-22 3
+ 1970-02-01 5
+ 1970-02-11 5
+ 1970-02-21 7
+
"""
def __init__(