From 2273cb84254b340910c9e87f9dc8f9f7686d6ff8 Mon Sep 17 00:00:00 2001
From: Stefano <stefano.savare@gmail.com>
Date: Thu, 19 Dec 2019 11:17:34 +0100
Subject: [PATCH 1/5] Added examples folder with README

Signed-off-by: Stefano <stefano.savare@gmail.com>
---
 examples/README.rst | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 examples/README.rst

diff --git a/examples/README.rst b/examples/README.rst
new file mode 100644
index 0000000..51189c4
--- /dev/null
+++ b/examples/README.rst
@@ -0,0 +1,27 @@
+.. image:: https://www.giotto.ai/static/vector/logo.svg
+   :width: 850
+
+Examples and Tutorials
+======================
+
+In this folder you can find basic tutorials and examples: you can read through them to
+understand how `giotto-time` works.
+
+Quick start
+-----------
+
+This tutorial is about giving an overview on the basic features of `giotto-time`.
+You will learn how to train a simple time series model with custom features.
+Some considerations on input-output are presented.
+
+Details and advanced features
+-----------------------------
+
+This tutorial details more advanced features of `giotto-time`.
+You will learn more details on the feature generation and on custom linear regressor
+model for time series forecasting.
+
+Causality Tests
+---------------
+
+This tutorial details the causality tests built-in in `giotto-time`.

From b311189399bc2686047df6cc486680706cc7d7d4 Mon Sep 17 00:00:00 2001
From: Stefano <stefano.savare@gmail.com>
Date: Thu, 19 Dec 2019 15:59:46 +0100
Subject: [PATCH 2/5] Bug fix in time series preparation

Signed-off-by: Stefano <stefano.savare@gmail.com>
---
 .../tests/test_time_series_preparation.py     | 33 +++++++++++++++++++
 .../time_series_preparation.py                | 12 ++++---
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/giottotime/time_series_preparation/tests/test_time_series_preparation.py b/giottotime/time_series_preparation/tests/test_time_series_preparation.py
index 33e8821..c455320 100644
--- a/giottotime/time_series_preparation/tests/test_time_series_preparation.py
+++ b/giottotime/time_series_preparation/tests/test_time_series_preparation.py
@@ -128,6 +128,39 @@ def test_wrong_input_type(self, wrong_input: Tuple):
         with pytest.raises(TypeError):
             time_series_preparation._to_time_index_series(wrong_input)
 
+    @given(series_with_period_index(), st.datetimes(), available_freqs())
+    def test_period_index_dataframe_unchanged(
+        self, period_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+    ):
+        period_index_dataframe = pd.DataFrame(period_index_series)
+        time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+        computed_time_series = time_series_preparation._to_time_index_series(
+            period_index_dataframe
+        )
+        assert_series_equal(computed_time_series, period_index_series)
+
+    @given(series_with_datetime_index(), st.datetimes(), available_freqs())
+    def test_datetime_index_dataframe_unchanged(
+        self, datetime_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+    ):
+        datetime_index_dataframe = pd.DataFrame(datetime_index_series)
+        time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+        computed_time_series = time_series_preparation._to_time_index_series(
+            datetime_index_dataframe
+        )
+        assert_series_equal(computed_time_series, datetime_index_series)
+
+    @given(series_with_timedelta_index(), st.datetimes(), available_freqs())
+    def test_timedelta_index_dataframe_unchanged(
+        self, timedelta_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta,
+    ):
+        timedelta_index_dataframe = pd.DataFrame(timedelta_index_series)
+        time_series_preparation = TimeSeriesPreparation(start=start, freq=freq)
+        computed_time_series = time_series_preparation._to_time_index_series(
+            timedelta_index_dataframe
+        )
+        assert_series_equal(computed_time_series, timedelta_index_series)
+
 
 class TestToEquispacedTimeSeries:
     @given(
diff --git a/giottotime/time_series_preparation/time_series_preparation.py b/giottotime/time_series_preparation/time_series_preparation.py
index 2e909e6..7b63914 100644
--- a/giottotime/time_series_preparation/time_series_preparation.py
+++ b/giottotime/time_series_preparation/time_series_preparation.py
@@ -78,13 +78,15 @@ def __init__(
             self.freq
         )
 
-    def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFrame:
+    def transform(
+        self, time_series: Union[List, np.array, pd.Series, pd.DataFrame]
+    ) -> pd.DataFrame:
         """Transforms an array-like sequence in a period-index DataFrame with a single
         column.
 
         Parameters
         ----------
-        time_series : Union[List, np.array, pd.Series], required
+        time_series : Union[List, np.array, pd.Series, pd.DataFrame], required
             The input time series.
 
         Returns
@@ -104,9 +106,11 @@ def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFra
         return period_index_dataframe
 
     def _to_time_index_series(
-        self, array_like_object: Union[List, np.array, pd.Series]
+        self, array_like_object: Union[List, np.array, pd.Series, pd.DataFrame]
     ) -> pd.Series:
-        if isinstance(array_like_object, pd.Series):
+        if isinstance(array_like_object, pd.DataFrame):
+            return self.pandas_converter.transform(array_like_object.iloc[:, 0])
+        elif isinstance(array_like_object, pd.Series):
             return self.pandas_converter.transform(array_like_object)
         elif any(
             isinstance(array_like_object, type_) for type_ in SUPPORTED_SEQUENCE_TYPES

From 323269fd25791b2b21ad83a302e84a743d14ea20 Mon Sep 17 00:00:00 2001
From: Stefano <stefano.savare@gmail.com>
Date: Thu, 19 Dec 2019 17:19:43 +0100
Subject: [PATCH 3/5] Added quick-start notebook

Signed-off-by: Stefano <stefano.savare@gmail.com>
---
 examples/quick-start.ipynb | 577 +++++++++++++++++++++++++++++++++++++
 1 file changed, 577 insertions(+)
 create mode 100644 examples/quick-start.ipynb

diff --git a/examples/quick-start.ipynb b/examples/quick-start.ipynb
new file mode 100644
index 0000000..c24d038
--- /dev/null
+++ b/examples/quick-start.ipynb
@@ -0,0 +1,577 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:34:45.079040Z",
+     "start_time": "2019-12-19T10:34:45.075853Z"
+    }
+   },
+   "source": [
+    "# Giotto-Time\n",
+    "\n",
+    "Welcome to `giotto-time`, our new library for time series forecasting!\n",
+    "\n",
+    "Let's start with an example."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:37:13.829605Z",
+     "start_time": "2019-12-19T10:37:13.827033Z"
+    }
+   },
+   "source": [
+    "## First example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:51:37.701263Z",
+     "start_time": "2019-12-19T10:51:37.698686Z"
+    }
+   },
+   "source": [
+    "### Ingredients"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:43:03.249232Z",
+     "start_time": "2019-12-19T10:43:03.244743Z"
+    }
+   },
+   "source": [
+    "These are the main ingredients of `giotto-time`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:12:59.958832Z",
+     "start_time": "2019-12-19T14:12:59.307286Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from giottotime.time_series_preparation import TimeSeriesPreparation\n",
+    "from giottotime.feature_creation import FeatureCreation, ShiftFeature, MovingAverageFeature\n",
+    "from giottotime.model_selection import FeatureSplitter\n",
+    "from giottotime.models import GAR"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:43:23.300668Z",
+     "start_time": "2019-12-19T10:43:23.100775Z"
+    }
+   },
+   "source": [
+    "- `TimeSeriesPreparation`: checks the input format of the time series and converts it to the expected format.\n",
+    "- `FeatureCreation`, `ShiftFeature`, `MovingAverageFeature`: create the desired features on the time series that are \n",
+    "    used for the forecasting.\n",
+    "- `FeatureSplitter`: prepares the custom `giotto-time` train-test matrices that are used in the model\n",
+    "- `GAR`: generalized-auto-regressive model. This is the only time series model that we provide for the first release."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:50:42.797962Z",
+     "start_time": "2019-12-19T10:50:42.792529Z"
+    }
+   },
+   "source": [
+    "We also need a `scikit-learn`-model. We go for a standard linear regressor for this example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:13:00.294607Z",
+     "start_time": "2019-12-19T14:13:00.291612Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:51:15.298065Z",
+     "start_time": "2019-12-19T10:51:15.295733Z"
+    }
+   },
+   "source": [
+    "### Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T10:55:55.362286Z",
+     "start_time": "2019-12-19T10:55:55.358045Z"
+    }
+   },
+   "source": [
+    "We use the `pandas.testing` module to create a testing time series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:13:00.964858Z",
+     "start_time": "2019-12-19T14:13:00.961460Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def test_time_series():\n",
+    "    from pandas.util import testing as testing\n",
+    "    \n",
+    "    testing.N, testing.K = 500, 1\n",
+    "    df = testing.makeTimeDataFrame( freq=\"D\" )\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:13:01.303804Z",
+     "start_time": "2019-12-19T14:13:01.299245Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "time_series = test_time_series()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T13:37:09.941132Z",
+     "start_time": "2019-12-19T13:37:09.938476Z"
+    }
+   },
+   "source": [
+    "### Time Series Preparation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T13:37:33.357619Z",
+     "start_time": "2019-12-19T13:37:33.347192Z"
+    }
+   },
+   "source": [
+    "The input time series has to be a `pandas.DataFrame` with a `PeriodIndex`. Use the provided class `TimeSeriesPreparation` to convert the time series in this format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:13:02.320075Z",
+     "start_time": "2019-12-19T14:13:02.317384Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "time_series_preparation = TimeSeriesPreparation()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:13:04.685763Z",
+     "start_time": "2019-12-19T14:13:04.681195Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "period_index_time_series = time_series_preparation.transform(time_series)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:15:31.332440Z",
+     "start_time": "2019-12-19T14:15:31.322583Z"
+    }
+   },
+   "source": [
+    "### Feature Creation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:20:25.312078Z",
+     "start_time": "2019-12-19T14:20:25.307741Z"
+    }
+   },
+   "source": [
+    "The feature creation part is one of the core part of our library and the bridge between traditional time series forecasting techniques and machine learning.\n",
+    "\n",
+    "Starting with a time series in a `pandas.DataFrame`, we create two matrices `X` and `y` which can be used for training and testing."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:49:59.729021Z",
+     "start_time": "2019-12-19T14:49:59.718573Z"
+    }
+   },
+   "source": [
+    "We provide 12 different features. For simplicity we train a model using only `ShiftFeature` and `MovingAverageFeature`. \n",
+    "\n",
+    "`ShiftFeature` provides a temporal shift of the time series. Adding two `ShiftFeature` with shifts 1 and 2 is equivalent to an `AR(2)` model. \n",
+    "\n",
+    "The possibility to add the features that you want allows you to choose the model that best fits your data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:17:42.856996Z",
+     "start_time": "2019-12-19T14:17:42.853237Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "features = [\n",
+    "    ShiftFeature(1, output_name='shift_1'),\n",
+    "    ShiftFeature(2, output_name='shift_2'),\n",
+    "    MovingAverageFeature(3, output_name='moving_average_3'),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:18:17.215100Z",
+     "start_time": "2019-12-19T14:18:17.211908Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "feature_creation = FeatureCreation(time_series_features=features, horizon=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:21:07.990558Z",
+     "start_time": "2019-12-19T14:21:07.970206Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "features_X, features_y = feature_creation.fit_transform(period_index_time_series)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:42:32.549572Z",
+     "start_time": "2019-12-19T14:42:32.547124Z"
+    }
+   },
+   "source": [
+    "### Train-Test split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:43:02.820280Z",
+     "start_time": "2019-12-19T14:43:02.817384Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "feature_splitter = FeatureSplitter()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:43:40.401560Z",
+     "start_time": "2019-12-19T14:43:40.380814Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "X_train, y_train, X_test, y_test = feature_splitter.transform(features_X, features_y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:44:02.820817Z",
+     "start_time": "2019-12-19T14:44:02.818276Z"
+    }
+   },
+   "source": [
+    "### Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:45:09.230395Z",
+     "start_time": "2019-12-19T14:45:09.227402Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model = GAR(base_model=LinearRegression())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:45:09.467974Z",
+     "start_time": "2019-12-19T14:45:09.458956Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model = model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:45:20.428649Z",
+     "start_time": "2019-12-19T14:45:20.414290Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "predictions = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T15:49:35.236013Z",
+     "start_time": "2019-12-19T15:49:35.225037Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>y_0</th>\n",
+       "      <th>y_1</th>\n",
+       "      <th>y_2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2001-05-13</th>\n",
+       "      <td>0.498604</td>\n",
+       "      <td>-1.11394</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-05-14</th>\n",
+       "      <td>-1.113940</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 y_0      y_1  y_2\n",
+       "2001-05-13  0.498604 -1.11394  NaN\n",
+       "2001-05-14 -1.113940      NaN  NaN"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-19T14:45:22.160170Z",
+     "start_time": "2019-12-19T14:45:22.152303Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>y_0</th>\n",
+       "      <th>y_1</th>\n",
+       "      <th>y_2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2001-05-13</th>\n",
+       "      <td>0.168957</td>\n",
+       "      <td>0.024196</td>\n",
+       "      <td>0.083588</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2001-05-14</th>\n",
+       "      <td>0.019240</td>\n",
+       "      <td>0.075777</td>\n",
+       "      <td>0.110530</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 y_0       y_1       y_2\n",
+       "2001-05-13  0.168957  0.024196  0.083588\n",
+       "2001-05-14  0.019240  0.075777  0.110530"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From bee8c3f440f8fe56413c175f8a453b912dd6f465 Mon Sep 17 00:00:00 2001
From: Stefano <stefano.savare@gmail.com>
Date: Fri, 20 Dec 2019 12:13:50 +0100
Subject: [PATCH 4/5] Basic example done

Signed-off-by: Stefano <stefano.savare@gmail.com>
---
 examples/quick-start.ipynb | 233 +++++++++++++++++++------------------
 1 file changed, 118 insertions(+), 115 deletions(-)

diff --git a/examples/quick-start.ipynb b/examples/quick-start.ipynb
index c24d038..ad099c0 100644
--- a/examples/quick-start.ipynb
+++ b/examples/quick-start.ipynb
@@ -57,8 +57,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:12:59.958832Z",
-     "start_time": "2019-12-19T14:12:59.307286Z"
+     "end_time": "2019-12-20T11:08:40.433188Z",
+     "start_time": "2019-12-20T11:08:39.863805Z"
     }
    },
    "outputs": [],
@@ -102,8 +102,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:13:00.294607Z",
-     "start_time": "2019-12-19T14:13:00.291612Z"
+     "end_time": "2019-12-20T11:08:41.268423Z",
+     "start_time": "2019-12-20T11:08:41.265378Z"
     }
    },
    "outputs": [],
@@ -140,8 +140,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:13:00.964858Z",
-     "start_time": "2019-12-19T14:13:00.961460Z"
+     "end_time": "2019-12-20T11:08:42.074384Z",
+     "start_time": "2019-12-20T11:08:42.070697Z"
     }
    },
    "outputs": [],
@@ -159,8 +159,8 @@
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:13:01.303804Z",
-     "start_time": "2019-12-19T14:13:01.299245Z"
+     "end_time": "2019-12-20T11:08:42.366492Z",
+     "start_time": "2019-12-20T11:08:42.361791Z"
     }
    },
    "outputs": [],
@@ -197,8 +197,8 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:13:02.320075Z",
-     "start_time": "2019-12-19T14:13:02.317384Z"
+     "end_time": "2019-12-20T11:08:43.161252Z",
+     "start_time": "2019-12-20T11:08:43.158360Z"
     }
    },
    "outputs": [],
@@ -208,11 +208,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:13:04.685763Z",
-     "start_time": "2019-12-19T14:13:04.681195Z"
+     "end_time": "2019-12-20T11:08:43.428293Z",
+     "start_time": "2019-12-20T11:08:43.421929Z"
     }
    },
    "outputs": [],
@@ -264,11 +264,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:17:42.856996Z",
-     "start_time": "2019-12-19T14:17:42.853237Z"
+     "end_time": "2019-12-20T11:08:44.450001Z",
+     "start_time": "2019-12-20T11:08:44.446647Z"
     }
    },
    "outputs": [],
@@ -282,11 +282,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:18:17.215100Z",
-     "start_time": "2019-12-19T14:18:17.211908Z"
+     "end_time": "2019-12-20T11:08:44.737915Z",
+     "start_time": "2019-12-20T11:08:44.734648Z"
     }
    },
    "outputs": [],
@@ -296,11 +296,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:21:07.990558Z",
-     "start_time": "2019-12-19T14:21:07.970206Z"
+     "end_time": "2019-12-20T11:08:45.045070Z",
+     "start_time": "2019-12-20T11:08:45.022402Z"
     }
    },
    "outputs": [],
@@ -320,13 +320,25 @@
     "### Train-Test split"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-20T10:59:18.112521Z",
+     "start_time": "2019-12-20T10:59:18.108823Z"
+    }
+   },
+   "source": [
+    "We use `FeatureSplitter` to split the matrices X and y in train and test. "
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:43:02.820280Z",
-     "start_time": "2019-12-19T14:43:02.817384Z"
+     "end_time": "2019-12-20T11:08:45.885739Z",
+     "start_time": "2019-12-20T11:08:45.882557Z"
     }
    },
    "outputs": [],
@@ -336,11 +348,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:43:40.401560Z",
-     "start_time": "2019-12-19T14:43:40.380814Z"
+     "end_time": "2019-12-20T11:08:46.240108Z",
+     "start_time": "2019-12-20T11:08:46.221414Z"
     }
    },
    "outputs": [],
@@ -360,13 +372,31 @@
     "### Training"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-20T11:01:12.922844Z",
+     "start_time": "2019-12-20T11:01:12.919591Z"
+    }
+   },
+   "source": [
+    "We provide a `GAR` (Generalized Auto Regressive) model to forecast the time series.\n",
+    "\n",
+    "The traditional `AR` model is equivalent to our `GAR` model that uses only `ShiftFeature` columns in the `X` matrix.\n",
+    "`GAR` supports all the features compatible with the feature creation step.\n",
+    "\n",
+    "Moreover, `GAR` internally uses a `scikit-learn` compatible model for the internal time series regression. \n",
+    "In this example we use `LinearRegression`. A priori all the `fit-transform-predict` models are compatible (e.g. ridge regression, random forest, boosting, etc.. "
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:45:09.230395Z",
-     "start_time": "2019-12-19T14:45:09.227402Z"
+     "end_time": "2019-12-20T11:08:47.555831Z",
+     "start_time": "2019-12-20T11:08:47.553017Z"
     }
    },
    "outputs": [],
@@ -376,11 +406,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 13,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:45:09.467974Z",
-     "start_time": "2019-12-19T14:45:09.458956Z"
+     "end_time": "2019-12-20T11:08:48.059122Z",
+     "start_time": "2019-12-20T11:08:48.050062Z"
     }
    },
    "outputs": [],
@@ -389,95 +419,54 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 32,
+   "cell_type": "markdown",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:45:20.428649Z",
-     "start_time": "2019-12-19T14:45:20.414290Z"
+     "end_time": "2019-12-20T11:01:01.280526Z",
+     "start_time": "2019-12-20T11:01:01.278125Z"
     }
    },
-   "outputs": [],
    "source": [
-    "predictions = model.predict(X_test)"
+    "### Forecasting"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 36,
+   "cell_type": "markdown",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T15:49:35.236013Z",
-     "start_time": "2019-12-19T15:49:35.225037Z"
-    },
-    "scrolled": true
+     "end_time": "2019-12-20T11:10:02.544672Z",
+     "start_time": "2019-12-20T11:10:02.540859Z"
+    }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>y_0</th>\n",
-       "      <th>y_1</th>\n",
-       "      <th>y_2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2001-05-13</th>\n",
-       "      <td>0.498604</td>\n",
-       "      <td>-1.11394</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2001-05-14</th>\n",
-       "      <td>-1.113940</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                 y_0      y_1  y_2\n",
-       "2001-05-13  0.498604 -1.11394  NaN\n",
-       "2001-05-14 -1.113940      NaN  NaN"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
+   "source": [
+    "We forecast 3 time steps of the time series (we set this parameter in `FeatureCreation`).\n",
+    "\n",
+    "The format of the output is the following:\n",
+    "- the index is the step at which the prediction is made.\n",
+    "- the column `y_1` is the prediction one time step after and so on for `y_2` and `y_3`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-12-20T11:08:48.939181Z",
+     "start_time": "2019-12-20T11:08:48.931145Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
-    "y_test"
+    "predictions = model.predict(X_test)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 15,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-12-19T14:45:22.160170Z",
-     "start_time": "2019-12-19T14:45:22.152303Z"
+     "end_time": "2019-12-20T11:08:50.014625Z",
+     "start_time": "2019-12-20T11:08:49.989948Z"
     }
    },
    "outputs": [
@@ -502,35 +491,42 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>y_0</th>\n",
        "      <th>y_1</th>\n",
        "      <th>y_2</th>\n",
+       "      <th>y_3</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>2001-05-12</th>\n",
+       "      <td>-0.149298</td>\n",
+       "      <td>-0.164899</td>\n",
+       "      <td>-0.092473</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>2001-05-13</th>\n",
-       "      <td>0.168957</td>\n",
-       "      <td>0.024196</td>\n",
-       "      <td>0.083588</td>\n",
+       "      <td>-0.150681</td>\n",
+       "      <td>-0.085710</td>\n",
+       "      <td>-0.063871</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2001-05-14</th>\n",
-       "      <td>0.019240</td>\n",
-       "      <td>0.075777</td>\n",
-       "      <td>0.110530</td>\n",
+       "      <td>-0.066199</td>\n",
+       "      <td>-0.134353</td>\n",
+       "      <td>-0.095745</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                 y_0       y_1       y_2\n",
-       "2001-05-13  0.168957  0.024196  0.083588\n",
-       "2001-05-14  0.019240  0.075777  0.110530"
+       "                 y_1       y_2       y_3\n",
+       "2001-05-12 -0.149298 -0.164899 -0.092473\n",
+       "2001-05-13 -0.150681 -0.085710 -0.063871\n",
+       "2001-05-14 -0.066199 -0.134353 -0.095745"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -538,6 +534,13 @@
    "source": [
     "predictions"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 8484f61b86b96bf9487ad99bff5789750873bba4 Mon Sep 17 00:00:00 2001
From: Stefano <stefano.savare@gmail.com>
Date: Fri, 20 Dec 2019 17:11:52 +0100
Subject: [PATCH 5/5] Added examples

Signed-off-by: Stefano <stefano.savare@gmail.com>
---
 .gitignore                                    |  1 +
 .../time_series_preparation/__init__.py       |  2 +-
 .../tests/test_time_series_conversion.py      | 14 ++++
 .../time_series_conversion.py                 | 73 ++++++++++++++++++-
 .../time_series_preparation.py                | 49 +++++++++++++
 5 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 632e72b..0ea875b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@ target/
 
 # Jupyter NB Checkpoints
 .ipynb_checkpoints/
+Untitled*
 
 # exclude data from source control by default
 /data/
diff --git a/giottotime/time_series_preparation/__init__.py b/giottotime/time_series_preparation/__init__.py
index de21fa1..d59471a 100755
--- a/giottotime/time_series_preparation/__init__.py
+++ b/giottotime/time_series_preparation/__init__.py
@@ -1,6 +1,6 @@
 """
 The :mod:`giottotime.feature_creation` module deals with the preparation of time series
-data, such as index conversions and resampling.
+data, such as conversion to `pandas.DataFrame` with a `PeriodIndex`.
 """
 
 from .time_series_conversion import (
diff --git a/giottotime/time_series_preparation/tests/test_time_series_conversion.py b/giottotime/time_series_preparation/tests/test_time_series_conversion.py
index f23265a..884eb5d 100644
--- a/giottotime/time_series_preparation/tests/test_time_series_conversion.py
+++ b/giottotime/time_series_preparation/tests/test_time_series_conversion.py
@@ -332,6 +332,20 @@ def test_only_timedelta_index_as_input(self, timedelta_index_series: pd.Series):
         )
         assert_series_equal(computed_series, expected_series)
 
+    def test_basic_timedelta_index_as_input(self):
+        timedelta_index_series = pd.Series(
+            index=pd.timedelta_range(start=pd.Timedelta(days=1), freq="10D", periods=3),
+            data=[1, 2, 3],
+        )
+        expected_series = pd.Series(
+            index=pd.PeriodIndex(["1970-01-02", "1970-01-12", "1970-01-22"], freq="D"),
+            data=[1, 2, 3],
+        )
+        computed_series = transform_time_index_series_into_period_index_series(
+            timedelta_index_series
+        )
+        assert_series_equal(computed_series, expected_series)
+
     @given(series_with_timedelta_index(), available_freqs())
     def test_timedelta_index_and_freq_as_input(
         self, timedelta_index_series: pd.Series, freq: pd.Timedelta
diff --git a/giottotime/time_series_preparation/time_series_conversion.py b/giottotime/time_series_preparation/time_series_conversion.py
index bf0c2f1..cd04546 100644
--- a/giottotime/time_series_preparation/time_series_conversion.py
+++ b/giottotime/time_series_preparation/time_series_conversion.py
@@ -186,6 +186,19 @@ class SequenceToTimeIndexSeries(TimeSeriesConversion):
         frequency of the output time series. Not mandatory for all time series
         conversion.
 
+    Examples
+    --------
+    >>> from giottotime.time_series_preparation import SequenceToTimeIndexSeries
+    >>> time_series = [1,2,3,5,5,7]
+    >>> sequence_to_time_index = SequenceToTimeIndexSeries(start='01-01-2010', freq='10D')
+    >>> sequence_to_time_index.transform(time_series)
+    2010-01-01    1
+    2010-01-11    2
+    2010-01-21    3
+    2010-01-31    5
+    2010-02-10    5
+    2010-02-20    7
+    Freq: 10D, dtype: int64
     """
 
     def __init__(
@@ -209,7 +222,7 @@ def _get_values_from(
 
 class PandasSeriesToTimeIndexSeries(TimeSeriesConversion):
     """Returns a Pandas Series with time index (DatetimeIndex, TimedeltaIndex or
-    PeriodIndex from a standard Pandas Series
+    PeriodIndex) from a standard Pandas Series
 
     Parameters
     ----------
@@ -222,6 +235,20 @@ class PandasSeriesToTimeIndexSeries(TimeSeriesConversion):
     freq : pd.Timedelta``, optional, default: ``None``
         The frequency of the time series.
 
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from giottotime.time_series_preparation import PandasSeriesToTimeIndexSeries
+    >>> time_series = pd.Series([1,2,3,5,5,7])
+    >>> sequence_to_time_index = PandasSeriesToTimeIndexSeries(start='01-01-2010', freq='10D')
+    >>> sequence_to_time_index.transform(time_series)
+    2010-01-01    1
+    2010-01-11    2
+    2010-01-21    3
+    2010-01-31    5
+    2010-02-10    5
+    2010-02-20    7
+    Freq: 10D, dtype: int64
     """
 
     def __init__(
@@ -255,7 +282,8 @@ def _has_time_index(self, time_series: pd.Series) -> bool:
 
 
 class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion):
-    """Converts a series with a time index to a series with a PeriodIndex.
+    """Converts a series with a time index (DatetimeIndex, TimedeltaIndex or
+    PeriodIndex) to a series with a PeriodIndex.
 
     It may be necessary to specify a `freq` if not already provided.
 
@@ -264,6 +292,47 @@ class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion):
     freq : pd.Timedelta, optional, default: ``None``
         The frequency of the time series.
 
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from giottotime.time_series_preparation import TimeIndexSeriesToPeriodIndexSeries
+    >>> period_index_time_series = pd.Series(
+    ...     index = pd.period_range(start='01-01-2010', freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> datetime_index_time_series = pd.Series(
+    ...     index = pd.date_range(start='01-01-2010', freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> timedelta_index_time_series = pd.Series(
+    ...     index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> sequence_to_time_index = TimeIndexSeriesToPeriodIndexSeries()
+    >>> sequence_to_time_index.transform(period_index_time_series)
+    2010-01-01    1
+    2010-01-11    2
+    2010-01-21    3
+    2010-01-31    5
+    2010-02-10    5
+    2010-02-20    7
+    freq: 10d, dtype: int64
+    >>> sequence_to_time_index.transform(datetime_index_time_series)
+    2010-01-01    1
+    2010-01-11    2
+    2010-01-21    3
+    2010-01-31    5
+    2010-02-10    5
+    2010-02-20    7
+    freq: 10d, dtype: int64
+    >>> sequence_to_time_index.transform(timedelta_index_time_series)
+    1970-01-02    1
+    1970-01-12    2
+    1970-01-22    3
+    1970-02-01    5
+    1970-02-11    5
+    1970-02-21    7
+    Freq: D, dtype: int64
     """
 
     def __init__(self, freq: Optional[pd.Timedelta] = None):
diff --git a/giottotime/time_series_preparation/time_series_preparation.py b/giottotime/time_series_preparation/time_series_preparation.py
index 7b63914..d5685fe 100644
--- a/giottotime/time_series_preparation/time_series_preparation.py
+++ b/giottotime/time_series_preparation/time_series_preparation.py
@@ -51,6 +51,55 @@ class TimeSeriesPreparation:
     ValueError
         Of the three parameters: start, end, and periods, exactly two must be specified.
 
+    Examples
+    --------
+    >>> time_series = [1,2,3,5,5,7]
+    >>> period_index_time_series = pd.Series(
+    ...     index = pd.period_range(start='01-01-2010', freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> datetime_index_time_series = pd.Series(
+    ...     index = pd.date_range(start='01-01-2010', freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> timedelta_index_time_series = pd.Series(
+    ...     index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6),
+    ...     data=[1,2,3,5,5,7]
+    ... )
+    >>> time_series_preparation = TimeSeriesPreparation()
+    >>> time_series_preparation.transform(time_series)
+                time_series
+    1970-01-01            1
+    1970-01-02            2
+    1970-01-03            3
+    1970-01-04            5
+    1970-01-05            5
+    1970-01-06            7
+    >>> time_series_preparation.transform(period_index_time_series)
+                time_series
+    2010-01-01            1
+    2010-01-11            2
+    2010-01-21            3
+    2010-01-31            5
+    2010-02-10            5
+    2010-02-20            7
+    >>> time_series_preparation.transform(datetime_index_time_series)
+                time_series
+    2010-01-01            1
+    2010-01-11            2
+    2010-01-21            3
+    2010-01-31            5
+    2010-02-10            5
+    2010-02-20            7
+    >>> time_series_preparation.transform(timedelta_index_time_series)
+                time_series
+    1970-01-02            1
+    1970-01-12            2
+    1970-01-22            3
+    1970-02-01            5
+    1970-02-11            5
+    1970-02-21            7
+
     """
 
     def __init__(