From 5cb2dbdba8cc45245d1cc0555fec0fa578e382fb Mon Sep 17 00:00:00 2001
From: Jialue Chen <jialuechen@outlook.com>
Date: Thu, 1 Aug 2024 01:22:28 -0400
Subject: [PATCH] preparing next major version

---
 README.md                                     | 305 ++++------------
 deepfolio/backtester.py                       |  45 +++
 .../constraints/tracking_error_constraints.py |  12 -
 deepfolio/constraints/turnover_constraints.py |  13 -
 deepfolio/constraints/weight_constraints.py   |  10 -
 .../combinatorial_purged_cv.py                |  35 --
 deepfolio/{backtesting => data}/__init__.py   |   0
 deepfolio/data/data_loader.py                 |  23 ++
 deepfolio/data/data_preprocessor.py           |   0
 deepfolio/data/real_time_data.py              |  33 --
 deepfolio/distance/kendall_distance.py        |  25 --
 deepfolio/distance/pearson_distance.py        |   0
 .../distance/variation_of_information.py      |  34 --
 deepfolio/estimators/equilibrium_estimator.py |   0
 deepfolio/estimators/returns_estimators.py    |  26 --
 deepfolio/estimators/shrinkage_estimator.py   |   0
 deepfolio/hyperparamater_tuning.py            |  31 ++
 deepfolio/models/automated_trading.py         |  22 --
 deepfolio/models/base.py                      |  17 -
 deepfolio/models/deep_learning_layer.py       |  16 +
 deepfolio/models/diffopt_portfolio.py         |  48 +++
 .../models/distributionally_robust_cvar.py    |  22 --
 deepfolio/models/equal_weighted.py            |   8 -
 deepfolio/models/factor_model.py              |  12 +
 deepfolio/models/feature_extractor.py         |  14 +
 deepfolio/models/hierachical_risk_parity.py   |  35 --
 deepfolio/models/mean_risk.py                 |  26 --
 deepfolio/models/merge_layer.py               |  10 +
 deepfolio/models/parameter_predictor.py       |  12 +
 deepfolio/models/qp_layer.py                  |  19 +
 deepfolio/models/random_weights.py            |   9 -
 deepfolio/models/rnn.py                       |  50 ---
 deepfolio/models/transformer.py               | 201 -----------
 .../{constraints => optimizers}/__init__.py   |   0
 deepfolio/optimizers/custom_optimizer.py      |  25 ++
 .../optimizers/multi_period_optimization.py   |  75 ++++
 deepfolio/optimizers/robust_optimization.py   |  60 ++++
 .../pre_selection/drop_correlated_assets.py   |  23 --
 .../pre_selection/non_dominated_selection.py  |  27 --
 deepfolio/pre_selection/select_k_extremes.py  |  18 -
 deepfolio/risk_measures/__init__.py           |   0
 deepfolio/risk_measures/kurtosis.py           |   0
 .../risk_measures/mean_absolute_deviation.py  |   4 -
 deepfolio/risk_measures/semi_variance.py      |   5 -
 deepfolio/risk_measures/skew.py               |   9 -
 deepfolio/risk_measures/variance.py           |   3 -
 deepfolio/{distance => solvers}/__init__.py   |   0
 deepfolio/solvers/solvers.py                  |  87 +++++
 deepfolio/train.py                            |  53 +++
 deepfolio/utils.py                            |   0
 deepfolio/{estimators => utils}/__init__.py   |   0
 deepfolio/utils/metrics.py                    |  10 +
 deepfolio/utils/risk_manaagement.py           |  28 ++
 deepfolio/utils/visualization.py              |  20 ++
 examples/1_mean_risk/README.txt               |   7 -
 .../1_mean_risk/plot_10_tracking_error.py     | 127 -------
 .../1_mean_risk/plot_11_empirical_prior.py    |  87 -----
 .../plot_12_black_and_litterman.py            | 106 ------
 examples/1_mean_risk/plot_13_factor_model.py  | 167 ---------
 .../plot_14_black_litterman_factor_model.py   | 128 -------
 .../plot_1_maximum_sharpe_ratio.py            | 126 -------
 examples/1_mean_risk/plot_2_minimum_CVaR.py   | 127 -------
 .../1_mean_risk/plot_3_efficient_frontier.py  | 103 ------
 .../1_mean_risk/plot_4_mean_variance_cdar.py  | 130 -------
 .../1_mean_risk/plot_5_weight_constraints.py  | 219 ------------
 .../1_mean_risk/plot_6_transaction_costs.py   | 199 -----------
 .../1_mean_risk/plot_7_management_fees.py     | 135 -------
 examples/1_mean_risk/plot_8_regularization.py | 331 ------------------
 .../1_mean_risk/plot_9_uncertainty_set.py     | 229 ------------
 examples/2_risk_budgeting/README.txt          |   7 -
 .../plot_1_risk_parity_variance.py            |  92 -----
 .../plot_2_risk_busgeting_CVaR.py             | 103 ------
 .../plot_3_risk_parity_ledoit_wolf.py         |  80 -----
 .../3_maxiumum_diversification/README.txt     |   7 -
 .../plot_1_maximum_divesification.py          |  84 -----
 .../4_distributionally_robust_cvar/README.txt |   7 -
 .../plot_1_distributionally_robust_cvar.py    | 110 ------
 examples/5_clustering/README.txt              |   6 -
 examples/5_clustering/plot_1_hrp_cvar.py      | 197 -----------
 examples/5_clustering/plot_2_herc_cdar.py     | 165 ---------
 examples/5_clustering/plot_3_hrp_vs_herc.py   | 226 ------------
 examples/5_clustering/plot_4_nco.py           | 181 ----------
 .../5_clustering/plot_5_nco_grid_search.py    | 194 ----------
 examples/6_ensemble/README.txt                |   7 -
 examples/6_ensemble/plot_1_stacking.py        | 252 -------------
 examples/7_pre_selection/README.txt           |   8 -
 .../7_pre_selection/plot_1_drop_correlated.py | 159 ---------
 .../plot_2_select_best_performers.py          | 204 -----------
 examples/8_data_preparation/README.txt        |   8 -
 .../plot_1_investment_horizon.py              |  78 -----
 examples/deep_learning/example_transformer.py | 115 ------
 requirements.txt                              | Bin 190 -> 306 bytes
 setup.py                                      |   4 +-
 93 files changed, 658 insertions(+), 5417 deletions(-)
 create mode 100644 deepfolio/backtester.py
 delete mode 100644 deepfolio/constraints/tracking_error_constraints.py
 delete mode 100644 deepfolio/constraints/turnover_constraints.py
 delete mode 100644 deepfolio/constraints/weight_constraints.py
 delete mode 100644 deepfolio/cross_validation/combinatorial_purged_cv.py
 rename deepfolio/{backtesting => data}/__init__.py (100%)
 delete mode 100644 deepfolio/data/data_preprocessor.py
 delete mode 100644 deepfolio/data/real_time_data.py
 delete mode 100644 deepfolio/distance/kendall_distance.py
 delete mode 100644 deepfolio/distance/pearson_distance.py
 delete mode 100644 deepfolio/distance/variation_of_information.py
 delete mode 100644 deepfolio/estimators/equilibrium_estimator.py
 delete mode 100644 deepfolio/estimators/returns_estimators.py
 delete mode 100644 deepfolio/estimators/shrinkage_estimator.py
 create mode 100644 deepfolio/hyperparamater_tuning.py
 delete mode 100644 deepfolio/models/automated_trading.py
 delete mode 100644 deepfolio/models/base.py
 create mode 100644 deepfolio/models/deep_learning_layer.py
 create mode 100644 deepfolio/models/diffopt_portfolio.py
 delete mode 100644 deepfolio/models/distributionally_robust_cvar.py
 delete mode 100644 deepfolio/models/equal_weighted.py
 create mode 100644 deepfolio/models/factor_model.py
 create mode 100644 deepfolio/models/feature_extractor.py
 delete mode 100644 deepfolio/models/hierachical_risk_parity.py
 delete mode 100644 deepfolio/models/mean_risk.py
 create mode 100644 deepfolio/models/merge_layer.py
 create mode 100644 deepfolio/models/parameter_predictor.py
 create mode 100644 deepfolio/models/qp_layer.py
 delete mode 100644 deepfolio/models/random_weights.py
 delete mode 100644 deepfolio/models/rnn.py
 delete mode 100644 deepfolio/models/transformer.py
 rename deepfolio/{constraints => optimizers}/__init__.py (100%)
 create mode 100644 deepfolio/optimizers/custom_optimizer.py
 create mode 100644 deepfolio/optimizers/multi_period_optimization.py
 create mode 100644 deepfolio/optimizers/robust_optimization.py
 delete mode 100644 deepfolio/pre_selection/drop_correlated_assets.py
 delete mode 100644 deepfolio/pre_selection/non_dominated_selection.py
 delete mode 100644 deepfolio/pre_selection/select_k_extremes.py
 delete mode 100644 deepfolio/risk_measures/__init__.py
 delete mode 100644 deepfolio/risk_measures/kurtosis.py
 delete mode 100644 deepfolio/risk_measures/mean_absolute_deviation.py
 delete mode 100644 deepfolio/risk_measures/semi_variance.py
 delete mode 100644 deepfolio/risk_measures/skew.py
 delete mode 100644 deepfolio/risk_measures/variance.py
 rename deepfolio/{distance => solvers}/__init__.py (100%)
 create mode 100644 deepfolio/solvers/solvers.py
 create mode 100644 deepfolio/train.py
 delete mode 100644 deepfolio/utils.py
 rename deepfolio/{estimators => utils}/__init__.py (100%)
 create mode 100644 deepfolio/utils/metrics.py
 create mode 100644 deepfolio/utils/risk_manaagement.py
 create mode 100644 deepfolio/utils/visualization.py
 delete mode 100644 examples/1_mean_risk/README.txt
 delete mode 100644 examples/1_mean_risk/plot_10_tracking_error.py
 delete mode 100644 examples/1_mean_risk/plot_11_empirical_prior.py
 delete mode 100644 examples/1_mean_risk/plot_12_black_and_litterman.py
 delete mode 100644 examples/1_mean_risk/plot_13_factor_model.py
 delete mode 100644 examples/1_mean_risk/plot_14_black_litterman_factor_model.py
 delete mode 100644 examples/1_mean_risk/plot_1_maximum_sharpe_ratio.py
 delete mode 100644 examples/1_mean_risk/plot_2_minimum_CVaR.py
 delete mode 100644 examples/1_mean_risk/plot_3_efficient_frontier.py
 delete mode 100644 examples/1_mean_risk/plot_4_mean_variance_cdar.py
 delete mode 100644 examples/1_mean_risk/plot_5_weight_constraints.py
 delete mode 100644 examples/1_mean_risk/plot_6_transaction_costs.py
 delete mode 100644 examples/1_mean_risk/plot_7_management_fees.py
 delete mode 100644 examples/1_mean_risk/plot_8_regularization.py
 delete mode 100644 examples/1_mean_risk/plot_9_uncertainty_set.py
 delete mode 100644 examples/2_risk_budgeting/README.txt
 delete mode 100644 examples/2_risk_budgeting/plot_1_risk_parity_variance.py
 delete mode 100644 examples/2_risk_budgeting/plot_2_risk_busgeting_CVaR.py
 delete mode 100644 examples/2_risk_budgeting/plot_3_risk_parity_ledoit_wolf.py
 delete mode 100644 examples/3_maxiumum_diversification/README.txt
 delete mode 100644 examples/3_maxiumum_diversification/plot_1_maximum_divesification.py
 delete mode 100644 examples/4_distributionally_robust_cvar/README.txt
 delete mode 100644 examples/4_distributionally_robust_cvar/plot_1_distributionally_robust_cvar.py
 delete mode 100644 examples/5_clustering/README.txt
 delete mode 100644 examples/5_clustering/plot_1_hrp_cvar.py
 delete mode 100644 examples/5_clustering/plot_2_herc_cdar.py
 delete mode 100644 examples/5_clustering/plot_3_hrp_vs_herc.py
 delete mode 100644 examples/5_clustering/plot_4_nco.py
 delete mode 100644 examples/5_clustering/plot_5_nco_grid_search.py
 delete mode 100644 examples/6_ensemble/README.txt
 delete mode 100644 examples/6_ensemble/plot_1_stacking.py
 delete mode 100644 examples/7_pre_selection/README.txt
 delete mode 100644 examples/7_pre_selection/plot_1_drop_correlated.py
 delete mode 100644 examples/7_pre_selection/plot_2_select_best_performers.py
 delete mode 100644 examples/8_data_preparation/README.txt
 delete mode 100644 examples/8_data_preparation/plot_1_investment_horizon.py
 delete mode 100644 examples/deep_learning/example_transformer.py

diff --git a/README.md b/README.md
index bddd08a..2164565 100644
--- a/README.md
+++ b/README.md
@@ -7,13 +7,12 @@
 
 ![PyPI - Version](https://img.shields.io/pypi/v/deepfolio)
 [![License](https://img.shields.io/badge/License-BSD_2--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause)
-![Python versions](https://img.shields.io/badge/python-3.12%2B-green)
+![Python versions](https://img.shields.io/badge/python-3.6%2B-green)
 ![PyPI downloads](https://img.shields.io/pypi/dm/deepfolio)
 [![Keras](https://img.shields.io/badge/Keras-3.x-red)](https://keras.io/)
-
 </div>
 
-**DeepFolio** is a Python library for portfolio optimization built on top of Keras 3 and TensorFlow 2. It offers a unified interface and tools compatible with Keras to build, fine-tune, and cross-validate portfolio models.
+**DeepFolio** is a Python library for portfolio optimization built on top of Google's TensorFlow platform. It combines traditional optimization techniques with deep learning approaches to provide a powerful toolkit for investment professionals and researchers.
 
 ## Installation
 
@@ -23,281 +22,113 @@ Install the package using pip:
 pip install deepfolio
 ```
 
-## Quick Start
+## Features
+
+- Differentiable portfolio optimization
+- Real-time optimization
+- Multi-asset class support
+- Backtesting system
+- Risk management tools
+- Factor model integration
+- Automated hyperparameter tuning
+- Trade execution simulation
+- Event-driven rebalancing
+- Comprehensive reporting
+- Sentiment analysis integration
+- Tax-aware optimization
+- Interactive visualization dashboard
 
-Here's a simple example to get you started with deepfolio:
+## Installation
+
+```bash
+pip install deepfolio
+```
+
+## Quick Start
 
 ```python
-import numpy as np
-from deepfolio.models import MeanRisk
-from deepfolio.estimators import EmpiricalReturnsEstimator
-from deepfolio.risk_measures import Variance
+from deepfolio import DiffOptPortfolio, CustomOptimizer, Backtester
 
-# Generate sample data
-returns = np.random.randn(100, 10)  # 100 time steps, 10 assets
+# Initialize the model
+model = DiffOptPortfolio(input_dim=50, n_assets=10, hidden_dim=64)
 
-# Initialize estimators and risk measure
-returns_estimator = EmpiricalReturnsEstimator()
-risk_measure = Variance()
+# Create an optimizer
+optimizer = CustomOptimizer(model.parameters())
 
-# Create and fit the model
-model = MeanRisk(returns_estimator=returns_estimator, risk_measure=risk_measure)
-model.fit(returns)
+# Load your data
+features, returns = load_your_data()
 
-# Get optimal weights
-optimal_weights = model.predict(returns)
-print("Optimal portfolio weights:", optimal_weights)
-```
+# Create a backtester
+backtester = Backtester(model, {'features': features, 'returns': returns})
 
+# Run backtesting
+backtester.run()
 
-## Available Models and Features
+# Get results
+results = backtester.get_results()
+print(f"Sharpe Ratio: {results['sharpe_ratio']}")
+print(f"Max Drawdown: {results['max_drawdown']}")
+```
 
-### Automated Trading
+## Advanced Usage
 
-DeepFolio now supports automated trading through integration with the Alpaca API. This feature allows users to:
+### Real-time Optimization
 
-Place Trades: Automatically place buy/sell orders based on portfolio optimization results.
-Execution Logic: Execute trades with customizable order parameters.
-Example usage:
 ```python
-from deepfolio.models.automated_trading import AutomatedTrading
+from deepfolio import RealtimeOptimizer, DataSource
 
-api_key = 'APCA-API-KEY-ID'
-secret_key = 'APCA-API-SECRET-KEY'
-base_url = 'https://paper-api.alpaca.markets'
-
-trader = AutomatedTrading(api_key, secret_key, base_url)
-trader.place_trade('AAPL', 10, 'buy')
+data_source = DataSource(api_key="your_api_key")
+optimizer = RealtimeOptimizer(model, data_source)
+optimizer.start()
 ```
 
-### Real-Time Data Integration
-DeepFolio now includes real-time data integration using WebSocket. This feature enables:
-
-Real-Time Market Data: Receive and process streaming market data for dynamic portfolio adjustments.
-Data Feeds: Integration with IEX Cloud for real-time data streaming.
-Example usage:
+### Multi-Asset Optimization
 
 ```python
-from deepfolio.data.real_time_data import RealTimeData
-
-socket_url = "wss://cloud-sse.iexapis.com/stable/stocksUSNoUTP?token=YOUR_IEX_CLOUD_TOKEN"
-real_time_data = RealTimeData(socket_url)
-real_time_data.run()
+from deepfolio import MultiAssetDiffOptPortfolio
 
+asset_classes = ['stocks', 'bonds', 'commodities']
+input_dims = {'stocks': 50, 'bonds': 30, 'commodities': 20}
+hidden_dims = {'stocks': 64, 'bonds': 32, 'commodities': 32}
+model = MultiAssetDiffOptPortfolio(asset_classes, input_dims, hidden_dims)
 ```
 
-### Portfolio Optimization
-- Naive: Equal-Weighted, Random (Dirichlet)
-- Convex: Mean-Risk, Distributionally Robust CVaR
-- Clustering: Hierarchical Risk Parity, Hierarchical Equal Risk Contribution, Nested Clusters Optimization
-
-### Expected Returns Estimator
-- Empirical
-- Equilibrium
-- Shrinkage
-
-### Distance Estimator
-- Pearson Distance
-- Kendall Distance
-- Variation of Information
-
-### Pre-Selection Transformer
-- Non-Dominated Selection
-- Select K Extremes (Best or Worst)
-- Drop Highly Correlated Assets
-
-### Risk Measures
-- Variance
-- Semi-Variance
-- Mean Absolute Deviation
-- Skew
-- Kurtosis
-
-### Cross-Validation and Model Selection
-- Walk Forward
-- Combinatorial Purged Cross-Validation
-
-### Optimization Features
-- Minimize Risk
-- Transaction Costs
-- L1 and L2 Regularization
-- Weight Constraints
-- Tracking Error Constraints
-- Turnover Constraints
-
-### Deep Learning Models
-- Transformer
-- RNN
-
-## Examples
-
-### Using Hierarchical Risk Parity
+### Tax-Aware Optimization
 
 ```python
-from deepfolio.models import HierarchicalRiskParity
-from deepfolio.estimators import EmpiricalReturnsEstimator
-from deepfolio.distance import PearsonDistance
-
-returns = np.random.randn(200, 20)  # 200 time steps, 20 assets
-
-model = HierarchicalRiskParity(
-    returns_estimator=EmpiricalReturnsEstimator(),
-    distance_estimator=PearsonDistance()
-)
-model.fit(returns)
-weights = model.predict(returns)
-print("HRP portfolio weights:", weights)
-```
+from deepfolio import TaxOptimizer
 
-### Optimization with Transformer
-```python
-from deepfolio.models.transformer import Transformer
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler
-import yfinance as yf
-import tensorflow as tf
-import numpy as np
-import matplotlib.pyplot as plt
-
-# Set random seeds for reproducibility
-tf.random.set_seed(42)
-np.random.seed(42)
-
-# Model parameters
-n_feature = 5  # Number of features per asset
-n_assets = 10  # Number of assets
-n_timestep = 30  # Number of time steps
-n_layer = 3  # Number of Transformer layers
-n_head = 8  # Number of attention heads
-n_hidden = 64  # Number of hidden units
-n_dropout = 0.1  # Dropout rate
-batch_size = 32
-epochs = 50
-lb = 0.0  # Lower bound for asset weights
-ub = 1.0  # Upper bound for asset weights
-
-def get_stock_data(tickers, start_date, end_date):
-    data = yf.download(tickers, start=start_date, end=end_date)
-    return data['Adj Close']
-
-# Get the first 10 stocks of S&P 500 as an example
-sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
-tickers = sp500['Symbol'].tolist()[:n_assets]
-
-# Download stock data
-stock_data = get_stock_data(tickers, '2010-01-01', '2023-01-01')
-
-# Calculate daily returns
-returns = stock_data.pct_change().dropna()
-
-def calculate_features(returns):
-    features = pd.DataFrame()
-    for ticker in returns.columns:
-        # Calculate 5-day, 10-day, and 20-day moving averages
-        features[f'{ticker}_MA5'] = returns[ticker].rolling(window=5).mean()
-        features[f'{ticker}_MA10'] = returns[ticker].rolling(window=10).mean()
-        features[f'{ticker}_MA20'] = returns[ticker].rolling(window=20).mean()
-        # Calculate 5-day, 10-day, and 20-day volatility
-        features[f'{ticker}_VOL5'] = returns[ticker].rolling(window=5).std()
-        features[f'{ticker}_VOL10'] = returns[ticker].rolling(window=10).std()
-        features[f'{ticker}_VOL20'] = returns[ticker].rolling(window=20).std()
-    return features.dropna()
-
-features = calculate_features(returns)
-
-# Prepare input data
-scaler = MinMaxScaler()
-scaled_features = scaler.fit_transform(features)
-
-X = []
-y = []
-for i in range(len(scaled_features) - n_timestep):
-    X.append(scaled_features[i:i+n_timestep])
-    y.append(returns.iloc[i+n_timestep].values)
-
-X = np.array(X)
-y = np.array(y)
-
-# Split into training and test sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
-# Custom loss function: negative Sharpe ratio
-def negative_sharpe_ratio(y_true, y_pred):
-    returns = tf.reduce_sum(y_true * y_pred, axis=1)
-    expected_return = tf.reduce_mean(returns)
-    stddev = tf.math.reduce_std(returns)
-    return -expected_return / (stddev + 1e-6)  # Add small value to avoid division by zero
-
-# Create and compile the model
-model = Transformer(n_feature * n_assets, n_timestep, n_layer, n_head, n_hidden, n_dropout, n_assets, lb, ub)
-model.compile(optimizer='adam', loss=negative_sharpe_ratio)
-
-# Train the model
-history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)
-
-# Evaluate the model
-test_loss = model.evaluate(X_test, y_test)
-print(f"Test loss: {test_loss}")
-
-# Make predictions using the model
-predictions = model.predict(X_test)
-
-# Calculate Sharpe ratio on the test set
-test_returns = np.sum(y_test * predictions, axis=1)
-sharpe_ratio = np.mean(test_returns) / np.std(test_returns)
-print(f"Sharpe Ratio on test set: {sharpe_ratio}")
-
-# Visualize results
-plt.figure(figsize=(10, 5))
-plt.plot(history.history['loss'], label='Training Loss')
-plt.plot(history.history['val_loss'], label='Validation Loss')
-plt.title('Model Loss')
-plt.xlabel('Epoch')
-plt.ylabel('Loss')
-plt.legend()
-plt.show()
-
-# Visualize asset allocation for the last time step
-plt.figure(figsize=(10, 5))
-plt.bar(tickers, predictions[-1])
-plt.title('Asset Allocation for Last Time Step')
-plt.xlabel('Assets')
-plt.ylabel('Weight')
-plt.xticks(rotation=45)
-plt.show()
+tax_optimizer = TaxOptimizer()
+optimal_trades = tax_optimizer.optimize(current_portfolio, target_weights, prices, cost_basis, holding_period)
 ```
 
-### Cross-Validation
+### Interactive Dashboard
 
 ```python
-from deepfolio.cross_validation import WalkForward
-from deepfolio.models import MeanRisk
-from deepfolio.risk_measures import SemiVariance
-
-cv = WalkForward(n_splits=5, test_size=20)
-model = MeanRisk(risk_measure=SemiVariance())
-
-for train_index, test_index in cv.split(returns):
-    train_returns, test_returns = returns[train_index], returns[test_index]
-    model.fit(train_returns)
-    weights = model.predict(test_returns)
-    # Evaluate performance...
+from deepfolio import PortfolioDashboard
+
+dashboard = PortfolioDashboard(portfolio_data, benchmark_data)
+dashboard.run()
 ```
 
 ## Documentation
 
-For full documentation, please visit our [documentation site](https://deepfolio.readthedocs.io/).
+For detailed documentation, please visit our [documentation site](https://diffopt-portfolio.readthedocs.io).
 
 ## Contributing
 
 We welcome contributions! Please see our [contributing guidelines](CONTRIBUTING.md) for more details.
 
+
 ## License
 
 This project is licensed under the BSD-2-Clause License- see the [LICENSE](LICENSE) file for details.
 
 ## Acknowledgments
 
-- This package leverages the power of Keras 3 for efficient portfolio optimization.
+- This package leverages the power of TensorFlow for efficient portfolio optimization.
 - Thanks to the financial machine learning community for inspiring many of the implemented methods.
+
+
+
+
diff --git a/deepfolio/backtester.py b/deepfolio/backtester.py
new file mode 100644
index 0000000..bacf0ad
--- /dev/null
+++ b/deepfolio/backtester.py
@@ -0,0 +1,45 @@
+import tensorflow as tf
+import numpy as np
+
+class Backtester:
+    def __init__(self, model, data):
+        self.model = model
+        self.data = data
+        self.results = None
+    
+    def run(self, initial_capital=10000):
+        features = self.data['features']
+        returns = self.data['returns']
+        
+        portfolio_values = [initial_capital]
+        weights_history = []
+        
+        for i in range(len(features)):
+            current_features = tf.convert_to_tensor(features[i:i+1], dtype=tf.float32)
+            current_returns = returns[i]
+            
+            weights = self.model(current_features)
+            weights = weights.numpy().flatten()
+            
+            portfolio_return = np.sum(weights * current_returns)
+            portfolio_values.append(portfolio_values[-1] * (1 + portfolio_return))
+            weights_history.append(weights)
+        
+        self.results = {
+            'portfolio_values': portfolio_values,
+            'weights_history': weights_history,
+            'sharpe_ratio': self.calculate_sharpe_ratio(portfolio_values),
+            'max_drawdown': self.calculate_max_drawdown(portfolio_values)
+        }
+    
+    def calculate_sharpe_ratio(self, portfolio_values):
+        returns = np.diff(portfolio_values) / portfolio_values[:-1]
+        return np.sqrt(252) * np.mean(returns) / np.std(returns)
+    
+    def calculate_max_drawdown(self, portfolio_values):
+        peak = np.maximum.accumulate(portfolio_values)
+        drawdown = (peak - portfolio_values) / peak
+        return np.max(drawdown)
+    
+    def get_results(self):
+        return self.results
\ No newline at end of file
diff --git a/deepfolio/constraints/tracking_error_constraints.py b/deepfolio/constraints/tracking_error_constraints.py
deleted file mode 100644
index d1645b9..0000000
--- a/deepfolio/constraints/tracking_error_constraints.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import keras
-import tensorflow as tf
-
-class TrackingErrorConstraint(keras.constraints.Constraint):
-    def __init__(self, benchmark_weights, max_tracking_error):
-        self.benchmark_weights = benchmark_weights
-        self.max_tracking_error = max_tracking_error
-
-    def __call__(self, w):
-        tracking_error = tf.sqrt(tf.reduce_sum(tf.square(w - self.benchmark_weights)))
-        scale = tf.minimum(1.0, self.max_tracking_error / (tracking_error + 1e-8))
-        return self.benchmark_weights + scale * (w - self.benchmark_weights)
\ No newline at end of file
diff --git a/deepfolio/constraints/turnover_constraints.py b/deepfolio/constraints/turnover_constraints.py
deleted file mode 100644
index 41cb504..0000000
--- a/deepfolio/constraints/turnover_constraints.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import keras
-import tensorflow as tf
-
-class TurnoverConstraint(keras.constraints.Constraint):
-    def __init__(self, previous_weights, max_turnover):
-        self.previous_weights = previous_weights
-        self.max_turnover = max_turnover
-
-    def __call__(self, w):
-        turnover = tf.reduce_sum(tf.abs(w - self.previous_weights))
-        if turnover > self.max_turnover:
-            return self.previous_weights + (w - self.previous_weights) * (self.max_turnover / turnover)
-        return w
\ No newline at end of file
diff --git a/deepfolio/constraints/weight_constraints.py b/deepfolio/constraints/weight_constraints.py
deleted file mode 100644
index af2d952..0000000
--- a/deepfolio/constraints/weight_constraints.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import keras
-import tensorflow as tf
-
-class WeightConstraints(keras.constraints.Constraint):
-    def __init__(self, lower_bound=0.0, upper_bound=1.0):
-        self.lower_bound = lower_bound
-        self.upper_bound = upper_bound
-
-    def __call__(self, w):
-        return tf.clip_by_value(w, self.lower_bound, self.upper_bound)
\ No newline at end of file
diff --git a/deepfolio/cross_validation/combinatorial_purged_cv.py b/deepfolio/cross_validation/combinatorial_purged_cv.py
deleted file mode 100644
index ddd1946..0000000
--- a/deepfolio/cross_validation/combinatorial_purged_cv.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import keras
-import tensorflow as tf
-import itertools
-
-class CombinatorialPurgedCV(keras.layers.Layer):
-    def __init__(self, n_splits, purge_period, **kwargs):
-        super().__init__(**kwargs)
-        self.n_splits = n_splits
-        self.purge_period = purge_period
-
-    def call(self, inputs):
-        n_samples = tf.shape(inputs)[1]
-        indices = tf.range(n_samples)
-        
-        split_points = tf.linspace(0.0, tf.cast(n_samples, tf.float32), self.n_splits + 1)
-        split_points = tf.cast(split_points, tf.int32)
-        
-        all_splits = []
-        for train_indices in itertools.combinations(range(1, self.n_splits), self.n_splits - 1):
-            train_indices = list(train_indices)
-            test_index = list(set(range(1, self.n_splits)) - set(train_indices))[0]
-            
-            train_mask = tf.zeros(n_samples, dtype=tf.bool)
-            for i in train_indices:
-                train_mask = tf.logical_or(train_mask, (indices >= split_points[i-1]) & (indices < split_points[i]))
-            
-            test_mask = (indices >= split_points[test_index-1]) & (indices < split_points[test_index])
-            
-            # Apply purging
-            purge_mask = (indices < split_points[test_index-1] - self.purge_period) | (indices >= split_points[test_index])
-            train_mask = tf.logical_and(train_mask, purge_mask)
-            
-            all_splits.append((train_mask, test_mask))
-        
-        return all_splits
\ No newline at end of file
diff --git a/deepfolio/backtesting/__init__.py b/deepfolio/data/__init__.py
similarity index 100%
rename from deepfolio/backtesting/__init__.py
rename to deepfolio/data/__init__.py
diff --git a/deepfolio/data/data_loader.py b/deepfolio/data/data_loader.py
index e69de29..7a7ba53 100644
--- a/deepfolio/data/data_loader.py
+++ b/deepfolio/data/data_loader.py
@@ -0,0 +1,23 @@
+import tensorflow as tf
+
+'''
+class PortfolioDataset(Dataset):
+    def __init__(self, features, returns):
+        self.features = features
+        self.returns = returns
+    
+    def __len__(self):
+        return len(self.features)
+    
+    def __getitem__(self, idx):
+        return self.features[idx], self.returns[idx]
+
+def get_data_loader(features, returns, batch_size=32):
+    dataset = PortfolioDataset(features, returns)
+    return DataLoader(dataset, batch_size=batch_size, shuffle=True)
+'''
+
+
+def get_data_loader(features, returns, batch_size=32):
+    dataset = tf.data.Dataset.from_tensor_slices((features, returns))
+    return dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
\ No newline at end of file
diff --git a/deepfolio/data/data_preprocessor.py b/deepfolio/data/data_preprocessor.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/data/real_time_data.py b/deepfolio/data/real_time_data.py
deleted file mode 100644
index 5694e5f..0000000
--- a/deepfolio/data/real_time_data.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import websocket
-import json
-
-class RealTimeData:
-    def __init__(self, socket_url):
-        self.socket_url = socket_url
-
-    def on_message(self, ws, message):
-        data = json.loads(message)
-        # Process real-time data (e.g., update portfolio)
-        print(data)
-
-    def on_error(self, ws, error):
-        print(error)
-
-    def on_close(self, ws):
-        print("### closed ###")
-
-    def on_open(self, ws):
-        print("### opened ###")
-
-    def run(self):
-        ws = websocket.WebSocketApp(self.socket_url, 
-                                    on_message=self.on_message, 
-                                    on_error=self.on_error, 
-                                    on_close=self.on_close)
-        ws.on_open = self.on_open
-        ws.run_forever()
-
-if __name__ == "__main__":
-    socket_url = "wss://cloud-sse.iexapis.com/stable/stocksUSNoUTP?token=YOUR_IEX_CLOUD_TOKEN"
-    real_time_data = RealTimeData(socket_url)
-    real_time_data.run()
diff --git a/deepfolio/distance/kendall_distance.py b/deepfolio/distance/kendall_distance.py
deleted file mode 100644
index 208c02d..0000000
--- a/deepfolio/distance/kendall_distance.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import keras
-import tensorflow as tf
-
-class KendallDistance(keras.layers.Layer):
-    def call(self, inputs):
-        def kendall_tau(x, y):
-            n = tf.shape(x)[0]
-            pairs = tf.shape(x)[0] * (tf.shape(x)[0] - 1) // 2
-            concordant = tf.reduce_sum(tf.sign(x[None, :] - x[:, None]) * tf.sign(y[None, :] - y[:, None]))
-            tau = concordant / tf.cast(pairs, tf.float32)
-            return 1.0 - tau
-
-        n_assets = tf.shape(inputs)[2]
-        distance_matrix = tf.zeros((tf.shape(inputs)[0], n_assets, n_assets))
-        
-        for i in range(n_assets):
-            for j in range(i+1, n_assets):
-                distance = kendall_tau(inputs[:, :, i], inputs[:, :, j])
-                distance_matrix = tf.tensor_scatter_nd_update(
-                    distance_matrix, 
-                    [[k, i, j] for k in range(tf.shape(inputs)[0])] + [[k, j, i] for k in range(tf.shape(inputs)[0])], 
-                    tf.tile(distance[tf.newaxis], [2 * tf.shape(inputs)[0]])
-                )
-        
-        return distance_matrix
\ No newline at end of file
diff --git a/deepfolio/distance/pearson_distance.py b/deepfolio/distance/pearson_distance.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/distance/variation_of_information.py b/deepfolio/distance/variation_of_information.py
deleted file mode 100644
index 47303ed..0000000
--- a/deepfolio/distance/variation_of_information.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import keras
-import tensorflow as tf
-
-class VariationOfInformation(keras.layers.Layer):
-    def call(self, inputs):
-        def entropy(x):
-            hist = tf.histogram_fixed_width(x, [tf.reduce_min(x), tf.reduce_max(x)], nbins=20)
-            probs = hist / tf.reduce_sum(hist)
-            return -tf.reduce_sum(probs * tf.math.log(probs + 1e-10))
-
-        def mutual_information(x, y):
-            joint_hist = tf.histogram2d(x, y, bins=[20, 20])[0]
-            joint_probs = joint_hist / tf.reduce_sum(joint_hist)
-            marginal_x = tf.reduce_sum(joint_probs, axis=1)
-            marginal_y = tf.reduce_sum(joint_probs, axis=0)
-            mi = tf.reduce_sum(joint_probs * tf.math.log(joint_probs / (marginal_x[:, tf.newaxis] * marginal_y[tf.newaxis, :]) + 1e-10))
-            return mi
-
-        n_assets = tf.shape(inputs)[2]
-        vi_matrix = tf.zeros((tf.shape(inputs)[0], n_assets, n_assets))
-        
-        for i in range(n_assets):
-            for j in range(i+1, n_assets):
-                h_i = entropy(inputs[:, :, i])
-                h_j = entropy(inputs[:, :, j])
-                mi = mutual_information(inputs[:, :, i], inputs[:, :, j])
-                vi = h_i + h_j - 2 * mi
-                vi_matrix = tf.tensor_scatter_nd_update(
-                    vi_matrix, 
-                    [[k, i, j] for k in range(tf.shape(inputs)[0])] + [[k, j, i] for k in range(tf.shape(inputs)[0])], 
-                    tf.tile(vi[tf.newaxis], [2 * tf.shape(inputs)[0]])
-                )
-        
-        return vi_matrix
\ No newline at end of file
diff --git a/deepfolio/estimators/equilibrium_estimator.py b/deepfolio/estimators/equilibrium_estimator.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/estimators/returns_estimators.py b/deepfolio/estimators/returns_estimators.py
deleted file mode 100644
index da51f1c..0000000
--- a/deepfolio/estimators/returns_estimators.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import keras
-import tensorflow as tf
-
-class EmpiricalReturnsEstimator(keras.layers.Layer):
-    def call(self, inputs):
-        return tf.reduce_mean(inputs, axis=1)
-
-class EquilibriumEstimator(keras.layers.Layer):
-    def __init__(self, risk_aversion=1.0, **kwargs):
-        super().__init__(**kwargs)
-        self.risk_aversion = risk_aversion
-
-    def call(self, inputs):
-        cov = tfp.stats.covariance(inputs)
-        market_weights = tf.ones_like(inputs[:, 0, :]) / tf.shape(inputs)[2]
-        return self.risk_aversion * tf.linalg.matvec(cov, market_weights)
-
-class ShrinkageEstimator(keras.layers.Layer):
-    def __init__(self, shrinkage_factor=0.5, **kwargs):
-        super().__init__(**kwargs)
-        self.shrinkage_factor = shrinkage_factor
-
-    def call(self, inputs):
-        sample_mean = tf.reduce_mean(inputs, axis=1)
-        grand_mean = tf.reduce_mean(sample_mean, axis=1, keepdims=True)
-        return self.shrinkage_factor * grand_mean + (1 - self.shrinkage_factor) * sample_mean
\ No newline at end of file
diff --git a/deepfolio/estimators/shrinkage_estimator.py b/deepfolio/estimators/shrinkage_estimator.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/hyperparamater_tuning.py b/deepfolio/hyperparamater_tuning.py
new file mode 100644
index 0000000..0273435
--- /dev/null
+++ b/deepfolio/hyperparamater_tuning.py
@@ -0,0 +1,31 @@
+import tensorflow as tf
+from kerastuner import RandomSearch
+from models.diffopt_portfolio import DiffOptPortfolio
+def build_model(hp):
+    model = DiffOptPortfolio(
+        input_dim=hp.Int('input_dim', 10, 100),
+        n_assets=hp.Int('n_assets', 5, 50),
+        hidden_dim=hp.Int('hidden_dim', 32, 256, step=32)
+    )
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')),
+        loss='mse'
+    )
+    return model
+
+def tune_hyperparameters(x, y, epochs=10, max_trials=10):
+    tuner = RandomSearch(
+        build_model,
+        objective='val_loss',
+        max_trials=max_trials,
+        executions_per_trial=3,
+        directory='hyperparam_tuning',
+        project_name='diffopt_portfolio'
+    )
+    
+    tuner.search(x, y, epochs=epochs, validation_split=0.2)
+    
+    best_model = tuner.get_best_models(num_models=1)[0]
+    best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
+    
+    return best_model, best_hyperparameters
\ No newline at end of file
diff --git a/deepfolio/models/automated_trading.py b/deepfolio/models/automated_trading.py
deleted file mode 100644
index dfa7693..0000000
--- a/deepfolio/models/automated_trading.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import alpaca_trade_api as tradeapi
-
-class AutomatedTrading:
-    def __init__(self, api_key, secret_key, base_url):
-        self.api = tradeapi.REST(api_key, secret_key, base_url)
-
-    def place_trade(self, symbol, qty, side):
-        self.api.submit_order(
-            symbol=symbol,
-            qty=qty,
-            side=side,
-            type='market',
-            time_in_force='gtc'
-        )
-
-if __name__ == "__main__":
-    api_key = 'APCA-API-KEY-ID'
-    secret_key = 'APCA-API-SECRET-KEY'
-    base_url = 'https://paper-api.alpaca.markets'
-
-    trader = AutomatedTrading(api_key, secret_key, base_url)
-    trader.place_trade('AAPL', 10, 'buy')
diff --git a/deepfolio/models/base.py b/deepfolio/models/base.py
deleted file mode 100644
index 305cea4..0000000
--- a/deepfolio/models/base.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import keras
-
-class BaseModel(keras.Model):
-    def __init__(self):
-        super().__init__()
-
-    def call(self, inputs):
-        raise NotImplementedError("Subclasses must implement this method")
-
-    def fit(self, returns, **kwargs):
-        super().fit(returns, returns, **kwargs)
-
-    def predict(self, returns):
-        return super().predict(returns)
-
-
-
diff --git a/deepfolio/models/deep_learning_layer.py b/deepfolio/models/deep_learning_layer.py
new file mode 100644
index 0000000..9ee2cbd
--- /dev/null
+++ b/deepfolio/models/deep_learning_layer.py
@@ -0,0 +1,16 @@
+import torch.nn as nn
+
+class DeepLearningLayer(nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim):
+        super().__init__()
+        self.network = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, output_dim),
+            nn.Softmax(dim=1)
+        )
+    
+    def forward(self, x):
+        return self.network(x)
\ No newline at end of file
diff --git a/deepfolio/models/diffopt_portfolio.py b/deepfolio/models/diffopt_portfolio.py
new file mode 100644
index 0000000..3b56260
--- /dev/null
+++ b/deepfolio/models/diffopt_portfolio.py
@@ -0,0 +1,48 @@
+import tensorflow as tf
+
+class DiffOptPortfolio(tf.keras.Model):
+    def __init__(self, input_dim, n_assets, hidden_dim):
+        super(DiffOptPortfolio, self).__init__()
+        self.feature_extractor = tf.keras.Sequential([
+            tf.keras.layers.Dense(hidden_dim, activation='relu', input_shape=(input_dim,)),
+            tf.keras.layers.Dense(hidden_dim, activation='relu')
+        ])
+        self.mu_predictor = tf.keras.layers.Dense(n_assets)
+        self.sigma_predictor = tf.keras.layers.Dense(n_assets * n_assets)
+        self.qp_layer = QPLayer(n_assets)
+    
+    def call(self, inputs):
+        features = self.feature_extractor(inputs)
+        mu = self.mu_predictor(features)
+        sigma = tf.reshape(self.sigma_predictor(features), (-1, mu.shape[1], mu.shape[1]))
+        return self.qp_layer([mu, sigma])
+
+class QPLayer(tf.keras.layers.Layer):
+    def __init__(self, n_assets):
+        super(QPLayer, self).__init__()
+        self.n_assets = n_assets
+    
+    def build(self, input_shape):
+        self.W = self.add_weight(shape=(self.n_assets, self.n_assets),
+                                 initializer='random_normal',
+                                 trainable=True)
+    
+    def call(self, inputs):
+        mu, Sigma = inputs
+        def objective_fn(w):
+            return tf.reduce_sum(tf.matmul(w, tf.matmul(Sigma, w, transpose_b=True))) - tf.reduce_sum(mu * w)
+        
+        constraints = [{'type': 'eq', 'fun': lambda w: tf.reduce_sum(w) - 1.0}]
+        bounds = [(0, None) for _ in range(self.n_assets)]
+        
+        initial_w = tf.ones((self.n_assets,)) / self.n_assets
+        
+        optimized_w = tf.py_function(
+            lambda: scipy.optimize.minimize(
+                objective_fn, initial_w, method='SLSQP', 
+                constraints=constraints, bounds=bounds
+            ).x,
+            [], tf.float32
+        )
+        
+        return optimized_w
\ No newline at end of file
diff --git a/deepfolio/models/distributionally_robust_cvar.py b/deepfolio/models/distributionally_robust_cvar.py
deleted file mode 100644
index 2d56a64..0000000
--- a/deepfolio/models/distributionally_robust_cvar.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import keras
-import tensorflow as tf
-from .base import BaseModel
-
-class DistributionallyRobustCVaR(BaseModel):
-    def __init__(self, alpha=0.05, epsilon=0.1, **kwargs):
-        super().__init__(**kwargs)
-        self.alpha = alpha
-        self.epsilon = epsilon
-        self.output_layer = keras.layers.Dense(1, activation='softmax')
-
-    def call(self, inputs):
-        # Simplified implementation of DR-CVaR
-        returns = inputs
-        sorted_returns = tf.sort(returns, axis=1)
-        var = sorted_returns[:, int(self.alpha * tf.shape(returns)[1])]
-        cvar = tf.reduce_mean(tf.where(returns <= var[:, tf.newaxis], returns, 0), axis=1)
-        
-        # Add distributional robustness (simplified)
-        robust_cvar = cvar - self.epsilon * tf.math.reduce_std(returns, axis=1)
-        
-        return self.output_layer(robust_cvar[:, tf.newaxis])
\ No newline at end of file
diff --git a/deepfolio/models/equal_weighted.py b/deepfolio/models/equal_weighted.py
deleted file mode 100644
index 88e7eb3..0000000
--- a/deepfolio/models/equal_weighted.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import keras
-import tensorflow as tf
-from .base import BaseModel
-
-class EqualWeighted(BaseModel):
-    def call(self, inputs):
-        n_assets = tf.shape(inputs)[2]
-        return tf.ones_like(inputs[:, 0, :]) / tf.cast(n_assets, tf.float32)
\ No newline at end of file
diff --git a/deepfolio/models/factor_model.py b/deepfolio/models/factor_model.py
new file mode 100644
index 0000000..f5ca36e
--- /dev/null
+++ b/deepfolio/models/factor_model.py
@@ -0,0 +1,12 @@
+import tensorflow as tf
+
+class FactorModel(tf.keras.Model):
+    def __init__(self, n_factors):
+        super(FactorModel, self).__init__()
+        self.factor_loadings = tf.keras.layers.Dense(n_factors, use_bias=False)
+    
+    def call(self, factor_returns):
+        return self.factor_loadings(factor_returns)
+    
+    def get_factor_exposures(self):
+        return self.factor_loadings.weights[0]
diff --git a/deepfolio/models/feature_extractor.py b/deepfolio/models/feature_extractor.py
new file mode 100644
index 0000000..02a61d0
--- /dev/null
+++ b/deepfolio/models/feature_extractor.py
@@ -0,0 +1,14 @@
+import torch.nn as nn
+
+class FeatureExtractor(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.network = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU()
+        )
+    
+    def forward(self, x):
+        return self.network(x)
\ No newline at end of file
diff --git a/deepfolio/models/hierachical_risk_parity.py b/deepfolio/models/hierachical_risk_parity.py
deleted file mode 100644
index 45a0e9a..0000000
--- a/deepfolio/models/hierachical_risk_parity.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import keras
-import tensorflow as tf
-from .base import BaseModel
-
-class HierarchicalClusteringLayer(keras.layers.Layer):
-    def __init__(self, distance_estimator, **kwargs):
-        super().__init__(**kwargs)
-        self.distance_estimator = distance_estimator
-
-    def call(self, inputs):
-        distances = self.distance_estimator(inputs)
-        # Implement hierarchical clustering algorithm here
-        # This is a placeholder and needs to be implemented
-        clusters = tf.eye(tf.shape(inputs)[1])  # Placeholder
-        return clusters
-
-class HRPWeightLayer(keras.layers.Layer):
-    def call(self, inputs):
-        returns, clusters = inputs
-        # Implement HRP weight calculation here
-        # This is a placeholder and needs to be implemented
-        weights = tf.ones_like(returns[:, 0, :]) / tf.shape(returns)[2]
-        return weights
-
-class HierarchicalRiskParity(BaseModel):
-    def __init__(self, returns_estimator, distance_estimator):
-        super().__init__()
-        self.returns_estimator = returns_estimator
-        self.clustering_layer = HierarchicalClusteringLayer(distance_estimator)
-        self.weight_layer = HRPWeightLayer()
-
-    def call(self, inputs):
-        x = self.returns_estimator(inputs)
-        clusters = self.clustering_layer(x)
-        return self.weight_layer([x, clusters])
\ No newline at end of file
diff --git a/deepfolio/models/mean_risk.py b/deepfolio/models/mean_risk.py
deleted file mode 100644
index 6ed51e1..0000000
--- a/deepfolio/models/mean_risk.py
+++ /dev/null
@@ -1,26 +0,0 @@
-
-import keras
-import tensorflow as tf
-from .base import BaseModel
-
-class MeanRiskLayer(keras.layers.Layer):
-    def __init__(self, risk_measure, **kwargs):
-        super().__init__(**kwargs)
-        self.risk_measure = risk_measure
-
-    def call(self, inputs):
-        mean_returns = tf.reduce_mean(inputs, axis=1)
-        risks = self.risk_measure(inputs)
-        return tf.stack([mean_returns, risks], axis=1)
-
-class MeanRisk(BaseModel):
-    def __init__(self, returns_estimator, risk_measure):
-        super().__init__()
-        self.returns_estimator = returns_estimator
-        self.mean_risk_layer = MeanRiskLayer(risk_measure)
-        self.output_layer = keras.layers.Dense(1, activation='softmax')
-
-    def call(self, inputs):
-        x = self.returns_estimator(inputs)
-        x = self.mean_risk_layer(x)
-        return self.output_layer(x)
\ No newline at end of file
diff --git a/deepfolio/models/merge_layer.py b/deepfolio/models/merge_layer.py
new file mode 100644
index 0000000..18d41bf
--- /dev/null
+++ b/deepfolio/models/merge_layer.py
@@ -0,0 +1,10 @@
+import torch.nn as nn
+
+class MergeLayer(nn.Module):
+    def __init__(self, input_dim):
+        super().__init__()
+        self.weight = nn.Parameter(torch.rand(input_dim))
+    
+    def forward(self, qp_output, dl_output):
+        w = torch.sigmoid(self.weight)
+        return w * qp_output + (1 - w) * dl_output
\ No newline at end of file
diff --git a/deepfolio/models/parameter_predictor.py b/deepfolio/models/parameter_predictor.py
new file mode 100644
index 0000000..9e7092d
--- /dev/null
+++ b/deepfolio/models/parameter_predictor.py
@@ -0,0 +1,12 @@
+import torch.nn as nn
+
+class ParameterPredictor(nn.Module):
+    def __init__(self, input_dim, n_assets):
+        super().__init__()
+        self.mu_predictor = nn.Linear(input_dim, n_assets)
+        self.sigma_predictor = nn.Linear(input_dim, n_assets * n_assets)
+    
+    def forward(self, x):
+        mu = self.mu_predictor(x)
+        sigma = self.sigma_predictor(x).view(-1, mu.size(1), mu.size(1))
+        return mu, sigma
\ No newline at end of file
diff --git a/deepfolio/models/qp_layer.py b/deepfolio/models/qp_layer.py
new file mode 100644
index 0000000..73d9468
--- /dev/null
+++ b/deepfolio/models/qp_layer.py
@@ -0,0 +1,19 @@
+import cvxpy as cp
+from cvxpylayers.torch import CvxpyLayer
+import torch.nn as nn
+
+class QPLayer(nn.Module):
+    def __init__(self, n_assets):
+        super().__init__()
+        mu = cp.Parameter(n_assets)
+        Sigma = cp.Parameter((n_assets, n_assets))
+        w = cp.Variable(n_assets)
+        
+        obj = cp.Minimize(cp.quad_form(w, Sigma) - mu.T @ w)
+        constraints = [cp.sum(w) == 1, w >= 0]
+        
+        problem = cp.Problem(obj, constraints)
+        self.qp_layer = CvxpyLayer(problem, parameters=[mu, Sigma], variables=[w])
+    
+    def forward(self, mu, Sigma):
+        return self.qp_layer(mu, Sigma)[0]
\ No newline at end of file
diff --git a/deepfolio/models/random_weights.py b/deepfolio/models/random_weights.py
deleted file mode 100644
index ac60bbf..0000000
--- a/deepfolio/models/random_weights.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import keras
-import tensorflow as tf
-from .base import BaseModel
-
-class RandomWeights(BaseModel):
-    def call(self, inputs):
-        n_assets = tf.shape(inputs)[2]
-        random_weights = tf.random.uniform(shape=(tf.shape(inputs)[0], n_assets))
-        return random_weights / tf.reduce_sum(random_weights, axis=1, keepdims=True)
\ No newline at end of file
diff --git a/deepfolio/models/rnn.py b/deepfolio/models/rnn.py
deleted file mode 100644
index 16b62c3..0000000
--- a/deepfolio/models/rnn.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import tensorflow as tf
-import numpy as np
-
-class RNN(tf.keras.Model):
-    def __init__(self, n_feature, n_timestep, n_hidden, n_layer, n_dropout, n_output, lb, ub):
-        super().__init__()
-        self.n_feature = n_feature
-        self.n_timestep = n_timestep
-        self.n_hidden = n_hidden
-        self.n_layer = n_layer
-        self.n_dropout = n_dropout
-        self.n_output = n_output
-        self.lb = lb
-        self.ub = ub
-
-        # LSTM layers
-        self.lstm_layers = [
-            tf.keras.layers.LSTM(n_hidden, return_sequences=True, dropout=n_dropout)
-            for _ in range(n_layer - 1)
-        ]
-        self.lstm_layers.append(tf.keras.layers.LSTM(n_hidden, dropout=n_dropout))
-
-        # Output layer
-        self.out = tf.keras.layers.Dense(n_output)
-
-    def call(self, x, training=False):
-        # x shape: (batch_size, n_timestep, n_feature)
-        for lstm_layer in self.lstm_layers:
-            x = lstm_layer(x, training=training)
-        
-        # x shape after LSTM: (batch_size, n_hidden)
-        output = self.out(x)
-        output = tf.nn.softmax(output, axis=1)
-        output = tf.map_fn(lambda w: self.rebalance(w, self.lb, self.ub), output)
-        return output
-
-    def rebalance(self, weight, lb, ub):
-        old = weight
-        weight_clamped = tf.clip_by_value(old, lb, ub)
-        while True:
-            leftover = tf.reduce_sum(old - weight_clamped)
-            nominees = tf.boolean_mask(weight_clamped, weight_clamped != ub)
-            gift = leftover * (nominees / tf.reduce_sum(nominees))
-            weight_clamped = tf.where(weight_clamped != ub, weight_clamped + gift, weight_clamped)
-            old = weight_clamped
-            if tf.reduce_sum(tf.cast(weight_clamped > ub, tf.int32)) == 0:
-                break
-            else:
-                weight_clamped = tf.clip_by_value(old, lb, ub)
-        return weight_clamped
\ No newline at end of file
diff --git a/deepfolio/models/transformer.py b/deepfolio/models/transformer.py
deleted file mode 100644
index 70b608d..0000000
--- a/deepfolio/models/transformer.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import tensorflow as tf
-import numpy as np
-import math
-
-def create_mask(batch, sequence_length):
-    mask = tf.zeros((batch, sequence_length, sequence_length))
-    for i in range(sequence_length):
-        mask = mask[:, i, :i+1].assign(1)
-    return mask
-
-class Norm(tf.keras.layers.Layer):
-    def __init__(self, d_model, eps=1e-6):
-        super().__init__()
-        self.size = d_model
-        self.eps = eps
-        self.alpha = tf.Variable(tf.ones(self.size))
-        self.bias = tf.Variable(tf.zeros(self.size))
-
-    def call(self, x):
-        mean = tf.reduce_mean(x, axis=-1, keepdims=True)
-        std = tf.math.reduce_std(x, axis=-1, keepdims=True)
-        return self.alpha * (x - mean) / (std + self.eps) + self.bias
-
-def attention(q, k, v, d_k, mask=None, dropout=None, return_weights=False):
-    scores = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(tf.cast(d_k, tf.float32))
-    
-    if mask is not None:
-        scores += (mask * -1e9)
-    
-    scores = tf.nn.softmax(scores, axis=-1)
-    
-    if dropout is not None:
-        scores = dropout(scores)
-    
-    output = tf.matmul(scores, v)
-    
-    if return_weights:
-        return output, scores
-    return output
-
-class MultiHeadAttention(tf.keras.layers.Layer):
-    def __init__(self, heads, d_model, dropout=0.1):
-        super().__init__()
-        self.d_model = d_model
-        self.d_k = d_model // heads
-        self.h = heads
-        
-        self.q_linear = tf.keras.layers.Dense(d_model)
-        self.v_linear = tf.keras.layers.Dense(d_model)
-        self.k_linear = tf.keras.layers.Dense(d_model)
-        
-        self.dropout = tf.keras.layers.Dropout(dropout)
-        self.out = tf.keras.layers.Dense(d_model)
-    
-    def call(self, q, k, v, mask=None, return_weights=False):
-        bs = tf.shape(q)[0]
-        
-        k = self.k_linear(k)
-        q = self.q_linear(q)
-        v = self.v_linear(v)
-        
-        k = tf.reshape(k, (bs, -1, self.h, self.d_k))
-        q = tf.reshape(q, (bs, -1, self.h, self.d_k))
-        v = tf.reshape(v, (bs, -1, self.h, self.d_k))
-        
-        k = tf.transpose(k, perm=[0, 2, 1, 3])
-        q = tf.transpose(q, perm=[0, 2, 1, 3])
-        v = tf.transpose(v, perm=[0, 2, 1, 3])
-        
-        if return_weights:
-            scores, weights = attention(q, k, v, self.d_k, mask, self.dropout, return_weights=return_weights)
-        else:
-            scores = attention(q, k, v, self.d_k, mask, self.dropout)
-        
-        concat = tf.transpose(scores, perm=[0, 2, 1, 3])
-        concat = tf.reshape(concat, (bs, -1, self.d_model))
-        output = self.out(concat)
-        
-        if return_weights:
-            return output, weights
-        else:
-            return output
-
-class FeedForward(tf.keras.layers.Layer):
-    def __init__(self, d_model, d_ff=400, dropout=0.1):
-        super().__init__()
-        self.linear_1 = tf.keras.layers.Dense(d_ff)
-        self.dropout = tf.keras.layers.Dropout(dropout)
-        self.linear_2 = tf.keras.layers.Dense(d_model)
-    
-    def call(self, x):
-        x = self.dropout(tf.nn.relu(self.linear_1(x)))
-        x = self.linear_2(x)
-        return x
-
-class EncoderLayer(tf.keras.layers.Layer):
-    def __init__(self, d_model, heads, dropout=0.1):
-        super().__init__()
-        self.norm_1 = Norm(d_model)
-        self.norm_2 = Norm(d_model)
-        self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
-        self.ff = FeedForward(d_model, dropout=dropout)
-        self.dropout_1 = tf.keras.layers.Dropout(dropout)
-        self.dropout_2 = tf.keras.layers.Dropout(dropout)
-    
-    def call(self, x, mask=None, return_weights=False):
-        x2 = self.norm_1(x)
-        if return_weights:
-            attn_output, attn_weights = self.attn(x2, x2, x2, mask, return_weights=return_weights)
-        else:
-            attn_output = self.attn(x2, x2, x2, mask)
-        x = x + self.dropout_1(attn_output)
-        x2 = self.norm_2(x)
-        x = x + self.dropout_2(self.ff(x2))
-        if return_weights:
-            return x, attn_weights
-        else:
-            return x
-
-class PositionalEncoder(tf.keras.layers.Layer):
-    def __init__(self, d_model, max_seq_len=100, dropout=0.1):
-        super().__init__()
-        self.d_model = d_model
-        self.dropout = tf.keras.layers.Dropout(dropout)
-        
-        pe = np.zeros((max_seq_len, d_model))
-        for pos in range(max_seq_len):
-            for i in range(0, d_model, 2):
-                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
-                pe[pos, i + 1] = math.cos(pos / (10000 ** ((2 * (i + 1)) / d_model)))
-        
-        self.pe = tf.constant(pe, dtype=tf.float32)
-    
-    def call(self, x):
-        x = x * tf.math.sqrt(tf.cast(self.d_model, tf.float32))
-        seq_len = tf.shape(x)[1]
-        pe = self.pe[:seq_len, :]
-        return self.dropout(x + pe)
-
-class Encoder(tf.keras.layers.Layer):
-    def __init__(self, input_size, seq_len, N, heads, dropout):
-        super().__init__()
-        self.N = N
-        self.pe = PositionalEncoder(input_size, seq_len, dropout=dropout)
-        self.layers = [EncoderLayer(input_size, heads, dropout) for _ in range(N)]
-        self.norm = Norm(input_size)
-    
-    def call(self, x, mask=None, return_weights=False):
-        x = self.pe(x)
-        for i in range(self.N):
-            if i == 0 and return_weights:
-                x, weights = self.layers[i](x, mask=mask, return_weights=return_weights)
-            else:
-                x = self.layers[i](x, mask=mask)
-        
-        if return_weights:
-            return self.norm(x), weights
-        else:
-            return self.norm(x)
-
-class Transformer(tf.keras.Model):
-    def __init__(self, n_feature, n_timestep, n_layer, n_head, n_dropout, n_output, lb, ub):
-        super().__init__()
-        self.encoder = Encoder(n_feature, n_timestep, n_layer, n_head, n_dropout)
-        self.out = tf.keras.layers.Dense(n_output)
-        self.tempmaxpool = tf.keras.layers.GlobalMaxPooling1D()
-        self.lb = lb
-        self.ub = ub
-    
-    def call(self, src, return_weights=False):
-        mask = create_mask(tf.shape(src)[0], tf.shape(src)[1])
-        
-        if return_weights:
-            e_outputs, weights = self.encoder(src, mask, return_weights=return_weights)
-        else:
-            e_outputs = self.encoder(src, mask)
-        
-        e_outputs = self.tempmaxpool(e_outputs)
-        output = self.out(e_outputs)
-        output = tf.nn.softmax(output, axis=1)
-        output = tf.map_fn(lambda x: self.rebalance(x, self.lb, self.ub), output)
-        
-        if return_weights:
-            return output, weights
-        else:
-            return output
-    
-    def rebalance(self, weight, lb, ub):
-        old = weight
-        weight_clamped = tf.clip_by_value(old, lb, ub)
-        while True:
-            leftover = tf.reduce_sum(old - weight_clamped)
-            nominees = tf.boolean_mask(weight_clamped, weight_clamped != ub)
-            gift = leftover * (nominees / tf.reduce_sum(nominees))
-            weight_clamped = tf.where(weight_clamped != ub, weight_clamped + gift, weight_clamped)
-            old = weight_clamped
-            if tf.reduce_sum(tf.cast(weight_clamped > ub, tf.int32)) == 0:
-                break
-            else:
-                weight_clamped = tf.clip_by_value(old, lb, ub)
-        return weight_clamped
diff --git a/deepfolio/constraints/__init__.py b/deepfolio/optimizers/__init__.py
similarity index 100%
rename from deepfolio/constraints/__init__.py
rename to deepfolio/optimizers/__init__.py
diff --git a/deepfolio/optimizers/custom_optimizer.py b/deepfolio/optimizers/custom_optimizer.py
new file mode 100644
index 0000000..98786f0
--- /dev/null
+++ b/deepfolio/optimizers/custom_optimizer.py
@@ -0,0 +1,25 @@
+import tensorflow as tf
+
+class CustomOptimizer(tf.keras.optimizers.Optimizer):
+    def __init__(self, learning_rate=0.001, name="CustomOptimizer", **kwargs):
+        super(CustomOptimizer, self).__init__(name, **kwargs)
+        self._lr = learning_rate
+    
+    def _create_slots(self, var_list):
+        for var in var_list:
+            self.add_slot(var, "momentum")
+    
+    @tf.function
+    def _resource_apply_dense(self, grad, var, apply_state):
+        var_dtype = var.dtype.base_dtype
+        lr_t = self._lr
+        momentum = self.get_slot(var, "momentum")
+        
+        momentum_t = momentum.assign(0.9 * momentum + 0.1 * grad)
+        var_update = var.assign_sub(lr_t * momentum_t)
+        
+        return tf.group(*[var_update, momentum_t])
+    
+    def get_config(self):
+        base_config = super(CustomOptimizer, self).get_config()
+        return {**base_config, "learning_rate": self._lr}
\ No newline at end of file
diff --git a/deepfolio/optimizers/multi_period_optimization.py b/deepfolio/optimizers/multi_period_optimization.py
new file mode 100644
index 0000000..ef0d830
--- /dev/null
+++ b/deepfolio/optimizers/multi_period_optimization.py
@@ -0,0 +1,75 @@
+import tensorflow as tf
+import cvxpy as cp
+import numpy as np
+
+class MultiPeriodOptimizer(tf.keras.layers.Layer):
+    def __init__(self, n_assets, n_periods, transaction_cost=0.001):
+        super(MultiPeriodOptimizer, self).__init__()
+        self.n_assets = n_assets
+        self.n_periods = n_periods
+        self.transaction_cost = transaction_cost
+
+    def call(self, inputs):
+        mu_sequence, Sigma_sequence = inputs
+        
+        def solve_multi_period_qp(mu_sequence, Sigma_sequence):
+            w = cp.Variable((self.n_periods, self.n_assets))
+            risk_aversion = cp.Parameter(nonneg=True)
+            
+            objective = 0
+            constraints = [cp.sum(w[0]) == 1, w[0] >= 0]
+            
+            for t in range(self.n_periods):
+                objective += mu_sequence[t] @ w[t] - risk_aversion * cp.quad_form(w[t], Sigma_sequence[t])
+                if t > 0:
+                    objective -= self.transaction_cost * cp.sum(cp.abs(w[t] - w[t-1]))
+                    constraints += [cp.sum(w[t]) == 1, w[t] >= 0]
+            
+            prob = cp.Problem(cp.Maximize(objective), constraints)
+            risk_aversion.value = 1.0  # Initial value for risk aversion
+            
+            try:
+                prob.solve(solver=cp.SCS)
+                if prob.status != cp.OPTIMAL:
+                    raise ValueError('Optimization problem not solved optimally')
+                return w.value
+            except:
+                # Fallback to equal-weight portfolio if optimization fails
+                return np.ones((self.n_periods, self.n_assets)) / self.n_assets
+
+        optimized_w = tf.py_function(
+            func=solve_multi_period_qp,
+            inp=[mu_sequence, Sigma_sequence],
+            Tout=tf.float32
+        )
+        
+        return optimized_w
+
+class MultiPeriodDiffOptPortfolio(tf.keras.Model):
+    def __init__(self, input_dim, n_assets, n_periods, hidden_dim, transaction_cost=0.001):
+        super(MultiPeriodDiffOptPortfolio, self).__init__()
+        self.n_periods = n_periods
+        self.feature_extractor = tf.keras.Sequential([
+            tf.keras.layers.Dense(hidden_dim, activation='relu', input_shape=(input_dim,)),
+            tf.keras.layers.Dense(hidden_dim, activation='relu')
+        ])
+        self.mu_predictor = tf.keras.layers.Dense(n_assets)
+        self.sigma_predictor = tf.keras.layers.Dense(n_assets * n_assets)
+        self.multi_period_optimizer = MultiPeriodOptimizer(n_assets, n_periods, transaction_cost)
+    
+    def call(self, inputs):
+        features_sequence = tf.unstack(inputs, axis=1)  # Unstack along time dimension
+        mu_sequence = []
+        sigma_sequence = []
+        
+        for features in features_sequence:
+            extracted_features = self.feature_extractor(features)
+            mu = self.mu_predictor(extracted_features)
+            sigma = tf.reshape(self.sigma_predictor(extracted_features), (-1, mu.shape[1], mu.shape[1]))
+            mu_sequence.append(mu)
+            sigma_sequence.append(sigma)
+        
+        mu_sequence = tf.stack(mu_sequence, axis=1)
+        sigma_sequence = tf.stack(sigma_sequence, axis=1)
+        
+        return self.multi_period_optimizer([mu_sequence, sigma_sequence])
\ No newline at end of file
diff --git a/deepfolio/optimizers/robust_optimization.py b/deepfolio/optimizers/robust_optimization.py
new file mode 100644
index 0000000..efa698a
--- /dev/null
+++ b/deepfolio/optimizers/robust_optimization.py
@@ -0,0 +1,60 @@
+import tensorflow as tf
+import cvxpy as cp
+import numpy as np
+
+class RobustMeanVarianceOptimizer(tf.keras.layers.Layer):
+    def __init__(self, n_assets, uncertainty_budget=0.1):
+        super(RobustMeanVarianceOptimizer, self).__init__()
+        self.n_assets = n_assets
+        self.uncertainty_budget = uncertainty_budget
+
+    def call(self, inputs):
+        mu, Sigma = inputs
+        
+        def solve_robust_qp(mu, Sigma):
+            w = cp.Variable(self.n_assets)
+            kappa = cp.Parameter(nonneg=True)
+            
+            obj = cp.Maximize(mu @ w - kappa * cp.quad_form(w, Sigma))
+            constraints = [
+                cp.sum(w) == 1,
+                w >= 0,
+                cp.norm(cp.sqrt(Sigma) @ w) <= self.uncertainty_budget
+            ]
+            
+            prob = cp.Problem(obj, constraints)
+            kappa.value = 1.0  # Initial value for risk aversion
+            
+            try:
+                prob.solve(solver=cp.SCS)
+                if prob.status != cp.OPTIMAL:
+                    raise ValueError('Optimization problem not solved optimally')
+                return w.value
+            except:
+                # Fallback to equal-weight portfolio if optimization fails
+                return np.ones(self.n_assets) / self.n_assets
+
+        optimized_w = tf.py_function(
+            func=solve_robust_qp,
+            inp=[mu, Sigma],
+            Tout=tf.float32
+        )
+        
+        return optimized_w
+
+class RobustDiffOptPortfolio(tf.keras.Model):
+    def __init__(self, input_dim, n_assets, hidden_dim, uncertainty_budget=0.1):
+        super(RobustDiffOptPortfolio, self).__init__()
+        self.feature_extractor = tf.keras.Sequential([
+            tf.keras.layers.Dense(hidden_dim, activation='relu', input_shape=(input_dim,)),
+            tf.keras.layers.Dense(hidden_dim, activation='relu')
+        ])
+        self.mu_predictor = tf.keras.layers.Dense(n_assets)
+        self.sigma_predictor = tf.keras.layers.Dense(n_assets * n_assets)
+        self.robust_optimizer = RobustMeanVarianceOptimizer(n_assets, uncertainty_budget)
+    
+    def call(self, inputs):
+        features = self.feature_extractor(inputs)
+        mu = self.mu_predictor(features)
+        sigma = tf.reshape(self.sigma_predictor(features), (-1, mu.shape[1], mu.shape[1]))
+        return self.robust_optimizer([mu, sigma])
diff --git a/deepfolio/pre_selection/drop_correlated_assets.py b/deepfolio/pre_selection/drop_correlated_assets.py
deleted file mode 100644
index 787d389..0000000
--- a/deepfolio/pre_selection/drop_correlated_assets.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import keras
-import tensorflow as tf
-
-class DropCorrelatedAssets(keras.layers.Layer):
-    def __init__(self, threshold=0.95, **kwargs):
-        super().__init__(**kwargs)
-        self.threshold = threshold
-
-    def call(self, inputs):
-        correlation_matrix = tfp.stats.correlation(inputs, sample_axis=1)
-        n_assets = tf.shape(correlation_matrix)[1]
-        
-        # Create a mask for assets to keep
-        mask = tf.ones((tf.shape(inputs)[0], n_assets), dtype=tf.float32)
-        
-        for i in range(n_assets):
-            for j in range(i+1, n_assets):
-                corr = correlation_matrix[:, i, j]
-                condition = tf.abs(corr) > self.threshold
-                # If assets are highly correlated, keep the first one (arbitrarily)
-                mask = tf.where(condition[:, tf.newaxis], mask * tf.one_hot(i, n_assets), mask)
-        
-        return mask
\ No newline at end of file
diff --git a/deepfolio/pre_selection/non_dominated_selection.py b/deepfolio/pre_selection/non_dominated_selection.py
deleted file mode 100644
index d245b01..0000000
--- a/deepfolio/pre_selection/non_dominated_selection.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import keras
-import tensorflow as tf
-
-class NonDominatedSelectionLayer(keras.layers.Layer):
-    def __init__(self, risk_measure, **kwargs):
-        super().__init__(**kwargs)
-        self.risk_measure = risk_measure
-
-    def call(self, inputs):
-        mean_returns = tf.reduce_mean(inputs, axis=1)
-        risks = self.risk_measure(inputs)
-        
-        # This is a simplified version and may not be efficient for large datasets
-        # A more efficient implementation would be needed for production use
-        dominated = tf.zeros_like(mean_returns, dtype=tf.bool)
-        for i in range(tf.shape(mean_returns)[1]):
-            for j in range(tf.shape(mean_returns)[1]):
-                if i != j:
-                    dominated = tf.logical_or(
-                        dominated,
-                        tf.logical_and(
-                            mean_returns[:, j] > mean_returns[:, i],
-                            risks[:, j] < risks[:, i]
-                        )
-                    )
-        
-        return tf.cast(tf.logical_not(dominated), tf.float32)
\ No newline at end of file
diff --git a/deepfolio/pre_selection/select_k_extremes.py b/deepfolio/pre_selection/select_k_extremes.py
deleted file mode 100644
index 778250e..0000000
--- a/deepfolio/pre_selection/select_k_extremes.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import keras
-import tensorflow as tf
-
-class SelectKExtremes(keras.layers.Layer):
-    def __init__(self, k, select_best=True, **kwargs):
-        super().__init__(**kwargs)
-        self.k = k
-        self.select_best = select_best
-
-    def call(self, inputs):
-        mean_returns = tf.reduce_mean(inputs, axis=1)
-        if self.select_best:
-            _, indices = tf.nn.top_k(mean_returns, k=self.k)
-        else:
-            _, indices = tf.nn.top_k(-mean_returns, k=self.k)
-        
-        mask = tf.reduce_sum(tf.one_hot(indices, depth=tf.shape(inputs)[2]), axis=1)
-        return mask
\ No newline at end of file
diff --git a/deepfolio/risk_measures/__init__.py b/deepfolio/risk_measures/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/risk_measures/kurtosis.py b/deepfolio/risk_measures/kurtosis.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/risk_measures/mean_absolute_deviation.py b/deepfolio/risk_measures/mean_absolute_deviation.py
deleted file mode 100644
index 2835592..0000000
--- a/deepfolio/risk_measures/mean_absolute_deviation.py
+++ /dev/null
@@ -1,4 +0,0 @@
-class MeanAbsoluteDeviation(keras.layers.Layer):
-    def call(self, inputs):
-        mean_return = tf.reduce_mean(inputs, axis=1, keepdims=True)
-        return tf.reduce_mean(tf.abs(inputs - mean_return), axis=1)
\ No newline at end of file
diff --git a/deepfolio/risk_measures/semi_variance.py b/deepfolio/risk_measures/semi_variance.py
deleted file mode 100644
index 946cb11..0000000
--- a/deepfolio/risk_measures/semi_variance.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class SemiVariance(keras.layers.Layer):
-    def call(self, inputs):
-        mean_return = tf.reduce_mean(inputs, axis=1, keepdims=True)
-        negative_returns = tf.minimum(inputs - mean_return, 0)
-        return tf.reduce_mean(tf.square(negative_returns), axis=1)
\ No newline at end of file
diff --git a/deepfolio/risk_measures/skew.py b/deepfolio/risk_measures/skew.py
deleted file mode 100644
index 480543f..0000000
--- a/deepfolio/risk_measures/skew.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import keras
-import tensorflow as tf
-
-class Skew(keras.layers.Layer):
-    def call(self, inputs):
-        mean = tf.reduce_mean(inputs, axis=1, keepdims=True)
-        std = tf.math.reduce_std(inputs, axis=1, keepdims=True)
-        z_scores = (inputs - mean) / std
-        return tf.reduce_mean(tf.pow(z_scores, 3), axis=1)
\ No newline at end of file
diff --git a/deepfolio/risk_measures/variance.py b/deepfolio/risk_measures/variance.py
deleted file mode 100644
index 55681f1..0000000
--- a/deepfolio/risk_measures/variance.py
+++ /dev/null
@@ -1,3 +0,0 @@
-class Variance(keras.layers.Layer):
-    def call(self, inputs):
-        return tf.math.reduce_variance(inputs, axis=1)
\ No newline at end of file
diff --git a/deepfolio/distance/__init__.py b/deepfolio/solvers/__init__.py
similarity index 100%
rename from deepfolio/distance/__init__.py
rename to deepfolio/solvers/__init__.py
diff --git a/deepfolio/solvers/solvers.py b/deepfolio/solvers/solvers.py
new file mode 100644
index 0000000..1819a66
--- /dev/null
+++ b/deepfolio/solvers/solvers.py
@@ -0,0 +1,87 @@
+import tensorflow as tf
+import cvxpy as cp
+import osqp
+import scipy.sparse as sparse
+import numpy as np
+
+class SolverWrapper:
+    def __init__(self, solver_type='OSQP'):
+        self.solver_type = solver_type
+
+    def solve_qp(self, P, q, A, l, u):
+        if self.solver_type == 'OSQP':
+            return self._solve_osqp(P, q, A, l, u)
+        elif self.solver_type == 'CVXPY':
+            return self._solve_cvxpy(P, q, A, l, u)
+        elif self.solver_type == 'SCS':
+            return self._solve_scs(P, q, A, l, u)
+        else:
+            raise ValueError(f"Unsupported solver type: {self.solver_type}")
+
+    def _solve_osqp(self, P, q, A, l, u):
+        # Convert to sparse matrices
+        P = sparse.csc_matrix(P)
+        A = sparse.csc_matrix(A)
+
+        # Create an OSQP object
+        prob = osqp.OSQP()
+
+        # Setup workspace and change alpha parameter
+        prob.setup(P, q, A, l, u, warm_start=True, verbose=False)
+
+        # Solve problem
+        res = prob.solve()
+
+        if res.info.status != 'solved':
+            raise ValueError('OSQP did not solve the problem!')
+
+        return res.x
+
+    def _solve_cvxpy(self, P, q, A, l, u):
+        n = P.shape[0]
+        x = cp.Variable(n)
+        objective = cp.Minimize(0.5 * cp.quad_form(x, P) + q.T @ x)
+        constraints = [A @ x <= u, A @ x >= l]
+        prob = cp.Problem(objective, constraints)
+        prob.solve(solver=cp.ECOS)
+        if prob.status != cp.OPTIMAL:
+            raise ValueError('CVXPY did not solve the problem!')
+        return x.value
+
+    def _solve_scs(self, P, q, A, l, u):
+        n = P.shape[0]
+        x = cp.Variable(n)
+        objective = cp.Minimize(0.5 * cp.quad_form(x, P) + q.T @ x)
+        constraints = [A @ x <= u, A @ x >= l]
+        prob = cp.Problem(objective, constraints)
+        prob.solve(solver=cp.SCS)
+        if prob.status != cp.OPTIMAL:
+            raise ValueError('SCS did not solve the problem!')
+        return x.value
+
+class QPLayer(tf.keras.layers.Layer):
+    def __init__(self, n_assets, solver_type='OSQP'):
+        super(QPLayer, self).__init__()
+        self.n_assets = n_assets
+        self.solver = SolverWrapper(solver_type)
+
+    def call(self, inputs):
+        mu, Sigma = inputs
+        P = Sigma
+        q = -mu
+
+        # Constraints
+        A = np.vstack([np.ones((1, self.n_assets)), np.eye(self.n_assets)])
+        l = np.array([1.0] + [0.0] * self.n_assets)
+        u = np.array([1.0] + [1.0] * self.n_assets)
+
+        def solve_qp(P, q, A, l, u):
+            return self.solver.solve_qp(P.numpy(), q.numpy(), A, l, u)
+
+        optimized_w = tf.py_function(
+            func=solve_qp,
+            inp=[P, q, A, l, u],
+            Tout=tf.float32
+        )
+
+        return optimized_w
\ No newline at end of file
diff --git a/deepfolio/train.py b/deepfolio/train.py
new file mode 100644
index 0000000..ac231a6
--- /dev/null
+++ b/deepfolio/train.py
@@ -0,0 +1,53 @@
+import torch
+import torch.nn as nn
+from deepfolio.models import FeatureExtractor, ParameterPredictor, QPLayer, DeepLearningLayer, MergeLayer
+from deepfolio.optimizers import CustomOptimizer
+from deepfolio.data import get_data_loader
+from deepfolio.utils import sharpe_ratio, max_drawdown
+
+class DiffOptPortfolio(nn.Module):
+    def __init__(self, input_dim, n_assets, hidden_dim):
+        super().__init__()
+        self.feature_extractor = FeatureExtractor(input_dim, hidden_dim)
+        self.parameter_predictor = ParameterPredictor(hidden_dim, n_assets)
+        self.qp_layer = QPLayer(n_assets)
+        self.dl_layer = DeepLearningLayer(hidden_dim, hidden_dim, n_assets)
+        self.merge_layer = MergeLayer(n_assets)
+    
+    def forward(self, x):
+        features = self.feature_extractor(x)
+        mu, sigma = self.parameter_predictor(features)
+        qp_output = self.qp_layer(mu, sigma)
+        dl_output = self.dl_layer(features)
+        return self.merge_layer(qp_output, dl_output)
+
+def train(model, train_loader, optimizer, epochs=100):
+    model.train()
+    for epoch in range(epochs):
+        for features, returns in train_loader:
+            optimizer.zero_grad()
+            weights = model(features)
+            portfolio_returns = torch.sum(weights * returns, dim=1)
+            loss = -sharpe_ratio(portfolio_returns)  # Maximize Sharpe ratio
+            loss.backward()
+            optimizer.step()
+        
+        if epoch % 10 == 0:
+            print(f"Epoch {epoch}, Loss: {loss.item()}")
+
+def main():
+    # Assume we have prepared our data
+    features, returns = prepare_data()
+    train_loader = get_data_loader(features, returns)
+    
+    input_dim = features.shape[1]
+    n_assets = returns.shape[1]
+    hidden_dim = 64
+    
+    model = DiffOptPortfolio(input_dim, n_assets, hidden_dim)
+    optimizer = CustomOptimizer(model.parameters())
+    
+    train(model, train_loader, optimizer)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/deepfolio/utils.py b/deepfolio/utils.py
deleted file mode 100644
index e69de29..0000000
diff --git a/deepfolio/estimators/__init__.py b/deepfolio/utils/__init__.py
similarity index 100%
rename from deepfolio/estimators/__init__.py
rename to deepfolio/utils/__init__.py
diff --git a/deepfolio/utils/metrics.py b/deepfolio/utils/metrics.py
new file mode 100644
index 0000000..6c32fb4
--- /dev/null
+++ b/deepfolio/utils/metrics.py
@@ -0,0 +1,10 @@
+import torch
+
+def sharpe_ratio(returns, risk_free_rate=0):
+    return (torch.mean(returns) - risk_free_rate) / torch.std(returns)
+
+def max_drawdown(returns):
+    cumulative = torch.cumsum(returns, dim=0)
+    max_so_far = torch.maximum.accumulate(cumulative)
+    drawdowns = (max_so_far - cumulative) / max_so_far
+    return torch.max(drawdowns)
\ No newline at end of file
diff --git a/deepfolio/utils/risk_manaagement.py b/deepfolio/utils/risk_manaagement.py
new file mode 100644
index 0000000..c049cef
--- /dev/null
+++ b/deepfolio/utils/risk_manaagement.py
@@ -0,0 +1,28 @@
+import tensorflow as tf
+
+def value_at_risk(returns, confidence_level=0.95):
+    return tf.quantile(returns, 1 - confidence_level)
+
+def conditional_value_at_risk(returns, confidence_level=0.95):
+    var = value_at_risk(returns, confidence_level)
+    return tf.reduce_mean(tf.boolean_mask(returns, returns <= var))
+
+def tracking_error(portfolio_returns, benchmark_returns):
+    return tf.math.reduce_std(portfolio_returns - benchmark_returns)
+
+class RiskManager(tf.keras.layers.Layer):
+    def __init__(self, max_leverage=1.5, max_position_size=0.3):
+        super(RiskManager, self).__init__()
+        self.max_leverage = max_leverage
+        self.max_position_size = max_position_size
+    
+    def call(self, weights):
+        weights = tf.maximum(weights, 0)
+        weights = tf.minimum(weights, self.max_position_size)
+        weights = weights / tf.reduce_sum(weights)
+        
+        leverage = tf.reduce_sum(tf.abs(weights))
+        if leverage > self.max_leverage:
+            weights = weights * (self.max_leverage / leverage)
+        
+        return weights
\ No newline at end of file
diff --git a/deepfolio/utils/visualization.py b/deepfolio/utils/visualization.py
new file mode 100644
index 0000000..de05f72
--- /dev/null
+++ b/deepfolio/utils/visualization.py
@@ -0,0 +1,20 @@
+import matplotlib.pyplot as plt
+
+def plot_portfolio_weights(weights, asset_names):
+    plt.figure(figsize=(10, 6))
+    plt.bar(asset_names, weights)
+    plt.title('Portfolio Weights')
+    plt.xlabel('Assets')
+    plt.ylabel('Weight')
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    plt.show()
+
+def plot_returns(returns):
+    plt.figure(figsize=(10, 6))
+    plt.plot(returns.cumsum())
+    plt.title('Cumulative Returns')
+    plt.xlabel('Time')
+    plt.ylabel('Cumulative Return')
+    plt.tight_layout()
+    plt.show()
\ No newline at end of file
diff --git a/examples/1_mean_risk/README.txt b/examples/1_mean_risk/README.txt
deleted file mode 100644
index c1de965..0000000
--- a/examples/1_mean_risk/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _mean_risk_examples:
-
-Mean-Risk
----------
-
-Examples using the :class:`~deepfolio.optimization.MeanRisk` optimization.
-
diff --git a/examples/1_mean_risk/plot_10_tracking_error.py b/examples/1_mean_risk/plot_10_tracking_error.py
deleted file mode 100644
index 3829752..0000000
--- a/examples/1_mean_risk/plot_10_tracking_error.py
+++ /dev/null
@@ -1,127 +0,0 @@
-r"""
-==============
-Tracking Error
-==============
-
-This tutorial shows how to incorporate a tracking error constraint into the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-The tracking error is defined as the RMSE (root-mean-square error) of the portfolio
-returns compared to a target returns.
-
-In this example we will create a long-short portfolio of 20 stocks that tracks the
-SPX Index with a tracking error constraint of 0.30% while minimizing the CVaR
-(Conditional Value at Risk) at 95%.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition and the prices of the S&P 500 Index itself:
-
-import numpy as np
-from plotly.io import show
-from PyTorch import clone
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset, load_sp500_index
-from deepfolio.optimization import EqualWeighted, MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices["2014":]
-spx_prices = load_sp500_index()
-spx_prices = spx_prices["2014":]
-
-X, y = prices_to_returns(prices, spx_prices)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create two long-short models: a Minimum CVaR without tracking error and a
-# Minimum CVaR with a 0.30% tracking error constraint versus the SPX Index.
-# A 0.30% tracking error constraint is a constraint on the RMSE of the difference
-# between the daily portfolio returns and the SPX Index returns.
-# We first create the Minimum CVaR model without tracking error:
-model_no_tracking = MeanRisk(
-    objective_function=ObjectiveFunction.MINIMIZE_RISK,
-    risk_measure=RiskMeasure.CVAR,
-    min_weights=-1,
-    portfolio_params=dict(name="Minimum-CVaR", tag="No Tracking"),
-)
-model_no_tracking.fit(X_train, y_train)
-model_no_tracking.weights_
-
-# %%
-# Then we create the Minimum CVaR model with a 0.30% tracking error constraint
-# versus the SPX Index:
-model_tracking = clone(model_no_tracking)
-model_tracking.set_params(
-    max_tracking_error=0.003,
-    portfolio_params=dict(name="Minimum-CVaR", tag="Tracking 0.30%"),
-)
-model_tracking.fit(X_train, y_train)
-model_no_tracking.weights_
-
-# %%
-# For comparison, we create a single asset Portfolio model containing the SPX Index.
-model_spx = EqualWeighted(portfolio_params=dict(name="SPX Index"))
-model_spx.fit(y_train)
-model_spx.weights_
-
-# %%
-# Now we plot both models and the SPX Index on the training set:
-ptf_no_tracking_train = model_no_tracking.predict(X_train)
-ptf_tracking_train = model_tracking.predict(X_train)
-spx_train = model_spx.predict(y_train)
-# Note that we coule have directly used:
-# train_spx = Portfolio(y_train, weights=[1], name="SPX Index")
-
-population_train = Population([ptf_no_tracking_train, ptf_tracking_train, spx_train])
-
-fig = population_train.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# Let's print the tracking error and the CVaR:
-for portfolio in [ptf_no_tracking_train, ptf_tracking_train]:
-    tracking_rmse = np.sqrt(np.mean((portfolio.returns - spx_train.returns) ** 2))
-    print("========================")
-    print(portfolio.tag)
-    print("========================")
-    print(f"Tracking RMSE: {tracking_rmse:0.2%}")
-    print(f"CVaR at 95%: {portfolio.cvar:0.2%}")
-    print(f"CVaR ratio: {portfolio.cvar_ratio:0.2f}")
-    print("\n")
-
-# %%
-# The model with tracking error achieved the required RMSE of 0.30% versus the SPX on
-# the training set. The tradeoff of this constraint is the higher CVaR value versus
-# the model without tracking error.
-
-# %%
-# Prediction
-# ==========
-# Finally, we predict both models on the test set:
-ptf_no_tracking_test = model_no_tracking.predict(X_test)
-ptf_tracking_test = model_tracking.predict(X_test)
-spx_test = model_spx.predict(y_test)
-
-for portfolio in [ptf_no_tracking_test, ptf_tracking_test]:
-    tracking_rmse = np.sqrt(np.mean((portfolio.returns - spx_test.returns) ** 2))
-    print("========================")
-    print(portfolio.tag)
-    print("========================")
-    print(f"Tracking RMSE: {tracking_rmse:0.2%}")
-    print(f"CVaR at 95%: {portfolio.cvar:0.2%}")
-    print(f"CVaR ratio: {portfolio.cvar_ratio:0.2f}")
-    print("\n")
-
-# %%
-# As expected, the model with tracking error also achieved a lower RMSE on the test set
-# compared to the model without tracking error.
diff --git a/examples/1_mean_risk/plot_11_empirical_prior.py b/examples/1_mean_risk/plot_11_empirical_prior.py
deleted file mode 100644
index e4455c0..0000000
--- a/examples/1_mean_risk/plot_11_empirical_prior.py
+++ /dev/null
@@ -1,87 +0,0 @@
-r"""
-===============
-Empirical Prior
-===============
-
-This tutorial shows how to use the :class:`~deepfolio.prior.EmpiricalPrior` estimator in
-the :class:`~deepfolio.optimization.MeanRisk` optimization.
-
-A :ref:`prior estimator <prior>` fits a :class:`~deepfolio.prior.PriorModel` containing
-the distribution estimate of asset returns. It represents the investor's prior beliefs
-about the model used to estimate such distribution.
-
-The `PriorModel` is a dataclass containing:
-
-    * `mu`: Expected returns estimation
-    * `covariance`: Covariance matrix estimation
-    * `returns`: assets returns estimation
-    * `cholesky` : Lower-triangular Cholesky factor of the covariance estimation (optional)
-
-The `EmpiricalPrior` estimator simply estimates the `PriorModel` from a `mu_estimator`
-and a `covariance_estimator`.
-
-In this tutorial we will build a Maximum Sharpe Ratio portfolio using the
-`EmpiricalPrior` estimator with James-Stein shrinkage for the estimation of expected
-returns and Denoising for the estimation of the covariance matrix.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the SPX Index composition starting from 1990-01-02 up to 2022-12-28:
-
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.moments import DenoiseCovariance, ShrunkMu
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior
-
-prices = load_sp500_dataset()
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create a Maximum Sharpe Ratio model with shrinkage for the estimation of the
-# expected returns and denoising for the estimation of the covariance matrix:
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=EmpiricalPrior(
-        mu_estimator=ShrunkMu(), covariance_estimator=DenoiseCovariance()
-    ),
-    portfolio_params=dict(name="Max Sharpe - ShrunkMu & DenoiseCovariance"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# Benchmark
-# =========
-# For comparison, we also create a Maximum Sharpe Ratio model using the default
-# moments estimators:
-bench = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    portfolio_params=dict(name="Max Sharpe"),
-)
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict both models on the test set:
-pred_model = model.predict(X_test)
-pred_bench = bench.predict(X_test)
-
-population = Population([pred_model, pred_bench])
-
-fig = population.plot_cumulative_returns()
-show(fig)
diff --git a/examples/1_mean_risk/plot_12_black_and_litterman.py b/examples/1_mean_risk/plot_12_black_and_litterman.py
deleted file mode 100644
index cccaafd..0000000
--- a/examples/1_mean_risk/plot_12_black_and_litterman.py
+++ /dev/null
@@ -1,106 +0,0 @@
-r"""
-=================
-Black & Litterman
-=================
-
-This tutorial shows how to use the :class:`~deepfolio.prior.BlackLitterman` estimator in
-the :class:`~deepfolio.optimization.MeanRisk` optimization.
-
-A :ref:`prior estimator <prior>` fits a :class:`~deepfolio.prior.PriorModel` containing
-the distribution estimate of asset returns. It represents the investor's prior beliefs
-about the model used to estimate such distribution.
-
-The `PriorModel` is a dataclass containing:
-
-    * `mu`: Expected returns estimation
-    * `covariance`: Covariance matrix estimation
-    * `returns`: assets returns estimation
-    * `cholesky` : Lower-triangular Cholesky factor of the covariance estimation (optional)
-
-The `BlackLitterman` estimator estimates the `PriorModel` using the Black & Litterman
-model. It takes as input a prior estimator used to compute the prior expected returns
-and prior covariance matrix, which are updated using the analyst's views to get the
-posterior expected returns and posterior covariance matrix.
-
-In this tutorial we will build a Maximum Sharpe Ratio portfolio using the
-`BlackLitterman` estimator.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the SPX Index composition starting from 1990-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import BlackLitterman
-
-prices = load_sp500_dataset()
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Analyst views
-# =============
-# Let's assume we are able to accurately estimate views about future realization of the
-# market. We estimate that Apple will have an expected return of 25% p.a. (absolute
-# view) and will outperform General Electric by 22% p.a. (relative view). We also
-# estimate that JPMorgan will outperform General Electric by 15% p.a (relative view).
-# By converting these annualized estimates into daily estimates to be homogenous with
-# the input `X`, we get:
-analyst_views = [
-    "AAPL == 0.00098",
-    "AAPL - GE == 0.00086",
-    "JPM - GE == 0.00059",
-]
-
-# %%
-# Black & Litterman Model
-# =======================
-# We create a Maximum Sharpe Ratio model using the Black & Litterman estimator that we
-# fit on the training set:
-model_bl = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=BlackLitterman(views=analyst_views),
-    portfolio_params=dict(name="Black & Litterman"),
-)
-model_bl.fit(X_train)
-model_bl.weights_
-
-# %%
-# Empirical Model
-# ===============
-# For comparison, we also create a Maximum Sharpe Ratio model using the default
-# Empirical estimator:
-model_empirical = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    portfolio_params=dict(name="Empirical"),
-)
-model_empirical.fit(X_train)
-model_empirical.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict both models on the test set:
-pred_bl = model_bl.predict(X_test)
-pred_empirical = model_empirical.predict(X_test)
-
-population = Population([pred_bl, pred_empirical])
-
-population.plot_cumulative_returns()
-
-# %%
-# Because our views were accurate, the Black & Litterman model outperformed the
-# Empirical model on the test set. From the below composition, we can see that Apple
-# and JPMorgan were allocated more weights:
-
-fig = population.plot_composition()
-show(fig)
diff --git a/examples/1_mean_risk/plot_13_factor_model.py b/examples/1_mean_risk/plot_13_factor_model.py
deleted file mode 100644
index bd26f8f..0000000
--- a/examples/1_mean_risk/plot_13_factor_model.py
+++ /dev/null
@@ -1,167 +0,0 @@
-r"""
-============
-Factor Model
-============
-
-This tutorial shows how to use the :class:`~deepfolio.prior.FactorModel` estimator in
-the :class:`~deepfolio.optimization.MeanRisk` optimization.
-
-A :ref:`prior estimator <prior>` fits a :class:`~deepfolio.prior.PriorModel` containing
-the distribution estimate of asset returns. It represents the investor's prior beliefs
-about the model used to estimate such distribution.
-
-The `PriorModel` is a dataclass containing:
-
-    * `mu`: Expected returns estimation
-    * `covariance`: Covariance matrix estimation
-    * `returns`: assets returns estimation
-    * `cholesky` : Lower-triangular Cholesky factor of the covariance estimation (optional)
-
-The `FactorModel` estimator estimates the :class:`PriorModel` using a factor model and a
-:ref:`prior estimator <prior>` of the factor's returns. The purpose of factor models is
-to impose a structure on financial variables and their covariance matrix by explaining
-them through a small number of common factors. This can help overcome estimation
-error by reducing the number of parameters, i.e., the dimensionality of the estimation
-problem, making portfolio optimization more robust against noise in the data. Factor
-models also provide a decomposition of financial risk to systematic and security
-specific components.
-
-To be compatible with `scikit-learn`, the `fit` method takes `X` as the assets returns
-and `y` as the factors returns. Note that `y` is in lowercase even for a 2D array
-(more than one factor). This is for consistency with the scikit-learn API.
-
-In this tutorial we will build a Maximum Sharpe Ratio portfolio using the `FactorModel`
-estimator.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the SPX Index composition and the Factors dataset composed of the daily
-# prices of 5 ETF representing common factors:
-from plotly.io import show
-from PyTorch.linear_model import RidgeCV
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_factors_dataset, load_sp500_dataset
-from deepfolio.moments import GerberCovariance, ShrunkMu
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior, FactorModel, LoadingMatrixRegression
-
-prices = load_sp500_dataset()
-factor_prices = load_factors_dataset()
-
-prices = prices["2014":]
-factor_prices = factor_prices["2014":]
-
-X, y = prices_to_returns(prices, factor_prices)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
-
-# %%
-# Factor Model
-# =============
-# We create a Maximum Sharpe Ratio model using the Factor Model that we fit on the
-# training set:
-model_factor_1 = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=FactorModel(),
-    portfolio_params=dict(name="Factor Model 1"),
-)
-model_factor_1.fit(X_train, y_train)
-model_factor_1.weights_
-
-# %%
-# We can change the :class:`~deepfolio.prior.BaseLoadingMatrix` that estimates the loading
-# matrix (betas) of the factors.
-#
-# The default is the :class:`LoadingMatrixRegression`, which fit the factors using a
-# `LassoCV` on each asset separately.
-#
-# For example, let's change the `LassoCV` into a `RidgeCV` without intercept and use
-# parallelization:
-model_factor_2 = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=FactorModel(
-        loading_matrix_estimator=LoadingMatrixRegression(
-            linear_regressor=RidgeCV(fit_intercept=False), n_jobs=-1
-        )
-    ),
-    portfolio_params=dict(name="Factor Model 2"),
-)
-model_factor_2.fit(X_train, y_train)
-model_factor_2.weights_
-
-# %%
-# We can also change the :ref:`prior estimator <prior>` of the factors.
-# It is used to estimate the :class:`~deepfolio.prior.PriorModel` containing the factors
-# expected returns and covariance matrix.
-#
-# For example, let's estimate the factors expected returns with James-Stein shrinkage
-# and the factors covariance matrix with the Gerber covariance estimator:
-model_factor_3 = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=FactorModel(
-        factor_prior_estimator=EmpiricalPrior(
-            mu_estimator=ShrunkMu(), covariance_estimator=GerberCovariance()
-        )
-    ),
-    portfolio_params=dict(name="Factor Model 3"),
-)
-model_factor_3.fit(X_train, y_train)
-model_factor_3.weights_
-
-# %%
-# Factor Analysis
-# ===============
-# Each fitted estimator is saved with a trailing underscore.
-# For example, we can access the fitted prior estimator with:
-prior_estimator = model_factor_3.prior_estimator_
-
-# %%
-# We can access the prior model with:
-prior_model = prior_estimator.prior_model_
-
-# %%
-# We can access the loading matrix with:
-loading_matrix = prior_estimator.loading_matrix_estimator_.loading_matrix_
-
-# %%
-# Empirical Model
-# ===============
-# For comparison, we also create a Maximum Sharpe Ratio model using the default
-# Empirical estimator:
-model_empirical = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    portfolio_params=dict(name="Empirical"),
-)
-model_empirical.fit(X_train)
-model_empirical.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict all models on the test set:
-ptf_factor_1_test = model_factor_1.predict(X_test)
-ptf_factor_2_test = model_factor_2.predict(X_test)
-ptf_factor_3_test = model_factor_3.predict(X_test)
-ptf_empirical_test = model_empirical.predict(X_test)
-
-population = Population(
-    [ptf_factor_1_test, ptf_factor_2_test, ptf_factor_3_test, ptf_empirical_test]
-)
-
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# Let's plot the portfolios' composition:
-population.plot_composition()
diff --git a/examples/1_mean_risk/plot_14_black_litterman_factor_model.py b/examples/1_mean_risk/plot_14_black_litterman_factor_model.py
deleted file mode 100644
index afcea33..0000000
--- a/examples/1_mean_risk/plot_14_black_litterman_factor_model.py
+++ /dev/null
@@ -1,128 +0,0 @@
-r"""
-==============================
-Black & Litterman Factor Model
-==============================
-
-This tutorial shows how to use the :class:`~deepfolio.prior.FactorModel` estimator coupled
-with the :class:`~deepfolio.prior.BlackLitterman` estimator in the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-The Black & Litterman Factor Model is a Factor Model in which we incorporate views on
-factors using the Black & Litterman Model.
-
-In the previous two tutorials, we introduced the Factor Model and the Black & Litterman
-separately. In this tutorial we show how we can merge them together by building a
-Maximum Sharpe Ratio portfolio using the `FactorModel` estimator.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the SPX Index composition and the Factors dataset composed of the daily
-# prices of 5 ETF representing common factors:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_factors_dataset, load_sp500_dataset
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import BlackLitterman, FactorModel
-
-prices = load_sp500_dataset()
-factor_prices = load_factors_dataset()
-
-prices = prices["2014":]
-factor_prices = factor_prices["2014":]
-
-X, y = prices_to_returns(prices, factor_prices)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
-
-# %%
-# Analyst views
-# =============
-# Let's assume we are able to accurately estimate views about future realization of the
-# factors. We estimate that the factor Size will have an expected return of 10% p.a.
-# (absolute view) and will outperform the factor Value by 3% p.a. (relative view). We
-# also estimate the factor Momentum will outperform the factor Quality by 2% p.a
-# (relative view). By converting these annualized estimates into daily estimates to be
-# homogenous with the input `X`, we get:
-factor_views = [
-    "SIZE == 0.00039",
-    "SIZE - VLUE == 0.00011 ",
-    "MTUM - QUAL == 0.00007",
-]
-
-# %%
-# Black & Litterman Factor Model
-# ==============================
-# We create a Maximum Sharpe Ratio model using the Black & Litterman Factor Model that
-# we fit on the training set:
-model_bl_factor = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=FactorModel(
-        factor_prior_estimator=BlackLitterman(views=factor_views),
-    ),
-    portfolio_params=dict(name="Black & Litterman Factor Model"),
-)
-model_bl_factor.fit(X_train, y_train)
-model_bl_factor.weights_
-
-# %%
-# For comparison, we also create a Maximum Sharpe Ratio model using a simple Factor
-# Model:
-model_factor = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    prior_estimator=FactorModel(),
-    portfolio_params=dict(name="Factor Model"),
-)
-model_factor.fit(X_train, y_train)
-model_factor.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict both models on the test set:
-ptf_bl_factor_test = model_bl_factor.predict(X_test)
-ptf_factor_test = model_factor.predict(X_test)
-
-population = Population([ptf_bl_factor_test, ptf_factor_test])
-
-population.plot_cumulative_returns()
-
-# %%
-# Because our factor views were accurate, the Black & Litterman Factor Model
-# outperformed the simple Factor Model on the test set.
-#
-# Let's plot the portfolios compositions:
-fig = population.plot_composition()
-show(fig)
-
-
-# %%
-# Going Further
-# =============
-# The API design makes it possible to created nested models without limits.
-# In the below example, we re-apply a Black & Litterman model incorporating assets
-# views. But instead of using the empirical moments, we use the above Black & Litterman
-# factor model:
-
-assets_views = [
-    "AAPL == 0.00098",
-    "AAPL - GE == 0.00086",
-    "JPM - GE == 0.00059",
-]
-
-model = BlackLitterman(
-    views=assets_views,
-    prior_estimator=FactorModel(
-        factor_prior_estimator=BlackLitterman(views=factor_views),
-    )
-)
-
-model.fit(X, y)
-print(model.prior_model_.covariance.shape)
-
diff --git a/examples/1_mean_risk/plot_1_maximum_sharpe_ratio.py b/examples/1_mean_risk/plot_1_maximum_sharpe_ratio.py
deleted file mode 100644
index 0371565..0000000
--- a/examples/1_mean_risk/plot_1_maximum_sharpe_ratio.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-====================
-Maximum Sharpe Ratio
-====================
-
-This tutorial uses the :class:`~deepfolio.optimization.MeanRisk` optimization to find the
-maximum Sharpe Ratio portfolio.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# Prices are transformed into linear returns (see :ref:`data preparation
-# <data_preparation>`) and split into a training set and a test set without shuffling to
-# avoid :ref:`data leakage <data_leakage>`.
-
-import numpy as np
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import InverseVolatility, MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-print(X_train.head())
-
-# %%
-# Model
-# =====
-# We create a Maximum Sharpe Ratio model and then fit it on the training set.
-# `portfolio_params` are parameters passed to the :class:`~deepfolio.portfolio.Portfolio`
-# returned by the `predict` method. It can be
-# omitted, here we use it to give a name to our maximum Sharpe Ration portfolio:
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    portfolio_params=dict(name="Max Sharpe"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use an inverse volatility benchmark using
-# the :class:`~deepfolio.optimization.InverseVolatility` estimator:
-benchmark = InverseVolatility(portfolio_params=dict(name="Inverse Vol"))
-benchmark.fit(X_train)
-benchmark.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-pred_model = model.predict(X_test)
-pred_bench = benchmark.predict(X_test)
-
-# %%
-# The `predict` method returns a :class:`~deepfolio.portfolio.Portfolio` object.
-#
-# :class:`~deepfolio.portfolio.Portfolio` is an array-container making it compatible
-# with `scikit-learn` tools: calling `np.asarray(pred_model)` gives the portfolio
-# returns (same as `pred_model.returns`):
-np.asarray(pred_model)
-
-# %%
-# The :class:`~deepfolio.portfolio.Portfolio` class contains a vast number of properties
-# and methods used for analysis.
-#
-# | For example:
-#
-# * pred_model.plot_cumulative_returns()
-# * pred_model.plot_composition()
-# * pred_model.summary()
-print(pred_model.annualized_sharpe_ratio)
-print(pred_bench.annualized_sharpe_ratio)
-
-# %%
-# Analysis
-# ========
-# For improved analysis, we load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([pred_model, pred_bench])
-
-# %%
-# The :class:`~deepfolio.population.Population` class also contains a
-# vast number of properties and methods used for analysis.
-# Let's plot each portfolio composition:
-population.plot_composition()
-
-# %%
-# .. note::
-#       Every `plot` methods in `deepfolio` returns a `plotly` figure.
-#       To display a plotly figure, you may need to call `show()` and change the
-#       default renderer: https://plotly.com/python/renderers/
-#
-# Let's plot each portfolio cumulative returns:
-fig = population.plot_cumulative_returns()
-# show(fig) is only used for the documentation sticker.
-show(fig)
-
-# %%
-# |
-#
-# Finally, let's display the full summary of both strategies evaluated on the test
-# set:
-population.summary()
-
-# %%
-# Conclusion
-# ==========
-# From the analysis on the test set, we see that the Maximum Sharpe Ratio portfolio
-# outperform the inverse-volatility benchmark for the mean and the ratio
-# measures including the Sharpe Ratio, and underperforms for the deviation and
-# shortfall measures.
-#
-# .. seealso::
-#       This was a toy example, for more advanced concepts check the :ref:`user guide
-#       <user_guide>` or the :ref:`other examples <general_examples>`.
-#
diff --git a/examples/1_mean_risk/plot_2_minimum_CVaR.py b/examples/1_mean_risk/plot_2_minimum_CVaR.py
deleted file mode 100644
index a5cf195..0000000
--- a/examples/1_mean_risk/plot_2_minimum_CVaR.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-============
-Minimum CVaR
-============
-
-This tutorial uses the :class:`~deepfolio.optimization.MeanRisk` optimization to find the
-minimum CVaR (Conditional Value at Risk) portfolio.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# Prices are transformed into linear returns (see :ref:`data preparation
-# <data_preparation>`) and split into a training set and a test set without shuffling to
-# avoid :ref:`data leakage <data_leakage>`.
-
-import numpy as np
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import EqualWeighted, MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-print(X_train.head())
-
-# %%
-# Model
-# =====
-# We create a Minimum CVaR model and then fit it on the training set.
-# `portfolio_params` are parameters passed to the :class:`~deepfolio.portfolio.Portfolio`
-# returned by the `predict` method. It can be
-# omitted, here we use it to give a name to our minimum CVaR portfolio:
-model = MeanRisk(
-    risk_measure=RiskMeasure.CVAR,
-    objective_function=ObjectiveFunction.MINIMIZE_RISK,
-    portfolio_params=dict(name="Min CVaR"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use an equal-weighted benchmark using
-# :class:`~deepfolio.optimization.EqualWeighted`:
-benchmark = EqualWeighted(portfolio_params=dict(name="Equal Weighted"))
-# Even if `X` has no impact (as it is equal weighted), we still need to call `fit` for
-# API consistency.
-benchmark.fit(X_train)
-benchmark.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-pred_model = model.predict(X_test)
-pred_bench = benchmark.predict(X_test)
-
-# %%
-# The `predict` method returns a :class:`~deepfolio.portfolio.Portfolio` object.
-#
-# :class:`~deepfolio.portfolio.Portfolio` is an array-container making it compatible
-# with `scikit-learn` tools: calling `np.asarray(pred_model)` gives the portfolio
-# returns (same as `pred_model.returns`):
-np.asarray(pred_model)
-
-# %%
-# The :class:`~deepfolio.portfolio.Portfolio` class contains a vast number of properties
-# and methods used for analysis.
-#
-# | For example:
-#
-# * pred_model.plot_cumulative_returns()
-# * pred_model.plot_composition()
-# * pred_model.summary()
-print(pred_model.cvar)
-print(pred_bench.cvar)
-
-# %%
-# Analysis
-# ========
-# For improved analysis, we load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([pred_model, pred_bench])
-
-# %%
-# The :class:`~deepfolio.population.Population` class also contains a
-# vast number of properties and methods used for analysis.
-# Let's plot each portfolio composition:
-population.plot_composition()
-
-# %%
-# .. note::
-#       Every `plot` methods in `deepfolio` returns a `plotly` figure.
-#       To display a plotly figure, you may need to call `show()` and change the
-#       default renderer: https://plotly.com/python/renderers/
-#
-# Let's plot each portfolio cumulative returns:
-fig = population.plot_cumulative_returns()
-# show(fig) is only used for the documentation sticker.
-show(fig)
-
-# %%
-# |
-#
-# Finally, let's display the full summary of both strategies evaluated on the test
-# set:
-population.summary()
-
-# %%
-# Conclusion
-# ==========
-# From the analysis on the test set, we see that the Minimum CVaR portfolio outperforms
-# the equal-weighted benchmark for all deviation and shortfall risk measures, except for
-# the drawdown measures, and underperforms for the mean and ratio measures.
-#
-# .. seealso::
-#       This was a toy example, for more advanced concepts check the
-#       :ref:`user guide <user_guide>` or the :ref:`other examples <general_examples>`.
-#
diff --git a/examples/1_mean_risk/plot_3_efficient_frontier.py b/examples/1_mean_risk/plot_3_efficient_frontier.py
deleted file mode 100644
index 0e80330..0000000
--- a/examples/1_mean_risk/plot_3_efficient_frontier.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-==================
-Efficient Frontier
-==================
-
-This tutorial uses the :class:`~deepfolio.optimization.MeanRisk` optimization to find an
-ensemble of portfolios belonging to the Mean-Variance efficient frontier (pareto font).
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-
-import numpy as np
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import PerfMeasure, RatioMeasure, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import MeanRisk
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create the Mean-Variance model and then fit it on the training set.
-# The parameter `efficient_frontier_size=30` is used to find 30 portfolios on the entire
-# efficient frontier:
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    efficient_frontier_size=30,
-    portfolio_params=dict(name="Variance"),
-)
-model.fit(X_train)
-print(model.weights_.shape)
-
-# %%
-# Prediction
-# ==========
-# We predict this model on both the training set and the test set.
-# The `predict` method returns the :class:`~deepfolio.population.Population` of
-# 30 :class:`~deepfolio.portfolio.Portfolio`:
-population_train = model.predict(X_train)
-population_test = model.predict(X_test)
-
-# %%
-# Analysis
-# ========
-# For improved analysis, we add a "Train" and "Test" tag to the portfolios and
-# concatenate the training and the test populations:
-population_train.set_portfolio_params(tag="Train")
-population_test.set_portfolio_params(tag="Test")
-
-population = population_train + population_test
-
-fig = population.plot_measures(
-    x=RiskMeasure.ANNUALIZED_VARIANCE,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
-show(fig)
-
-# %%
-# |
-#
-# Let's plot the composition of the 30 portfolios:
-population_train.plot_composition()
-
-# %%
-# Let's print the Sharpe Ratio of the 30 portfolios on the test set:
-population_test.measures(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO)
-
-# %%
-# Finally, we can show a full summary of the 30 portfolios evaluated on the test set:
-population.summary()
-
-# %%
-# Instead of providing `efficient_frontier_size=30`, you can also provide an array of
-# lower bounds for the expected returns using `min_return`. In the below example, we
-# find the 5 portfolios that minimize the variance under a minimum return constraint of
-# 15%, 20%, 25%, 30% and 35% (annualized):
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    min_return=np.array([0.15, 0.20, 0.25, 0.30, 0.35]) / 252,
-    portfolio_params=dict(name="Variance"),
-)
-
-population = model.fit_predict(X_train)
-
-population.plot_measures(
-    x=RiskMeasure.ANNUALIZED_VARIANCE,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
diff --git a/examples/1_mean_risk/plot_4_mean_variance_cdar.py b/examples/1_mean_risk/plot_4_mean_variance_cdar.py
deleted file mode 100644
index d7fc3df..0000000
--- a/examples/1_mean_risk/plot_4_mean_variance_cdar.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-==========================
-Mean-Variance-CDaR Surface
-==========================
-
-This tutorial uses the :class:`~deepfolio.optimization.MeanRisk` optimization to find an
-ensemble of portfolios belonging to the Mean-Variance-CDaR efficient frontier.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 2015-01-05 up to 2022-12-28:
-
-import numpy as np
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import PerfMeasure, RatioMeasure, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices["2015":]
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# First, we create a Maximum Sharpe Ratio model that we fit on the training set:
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-)
-portfolio = model.fit_predict(X_train)
-print(portfolio.cdar)
-
-# %%
-# Let's assume that we are not satisfied with the CDaR (Conditional Drawdown at Risk)
-# of 17% corresponding to the maximum Sharpe portfolio. We want to analyze alternative
-# portfolios that maximize the Sharpe under CDaR constraints.
-# To have an idea of the feasible CDaR constraints, we analyze the Minimum CDaR
-# portfolio:
-model = MeanRisk(risk_measure=RiskMeasure.CDAR)
-portfolio = model.fit_predict(X_train)
-print(portfolio.cdar)
-
-# %%
-# The minimum CDaR is 9.72%.
-# Now we find the pareto optimal portfolios that maximizes the Sharpe under CDaR
-# constraint ranging from 9.72% to 17%:
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    max_cdar=np.linspace(start=0.0972, stop=0.17, num=10),
-)
-model.fit(X_train)
-print(model.weights_.shape)
-
-# %%
-# Analysis
-# ==========
-# We predict this model on both the training set and the test set to analyze the
-# deformation of the efficient frontier:
-population_train = model.predict(X_train)
-population_test = model.predict(X_test)
-
-population_train.set_portfolio_params(tag="Train")
-population_test.set_portfolio_params(tag="Test")
-
-population = population_train + population_test
-
-population.plot_measures(
-    x=RiskMeasure.CDAR,
-    y=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
-
-# %%
-# Pareto Optimal Surface
-# ======================
-# Instead of analyzing the Sharpe-CDaR efficient frontier, we can analyze the
-# mean-Variance-CDaR pareto optimal surface:
-variance_upper = population_train.max_measure(PerfMeasure.MEAN).variance
-x = np.linspace(start=0.00012, stop=variance_upper, num=10)
-y = np.linspace(start=0.10, stop=0.17, num=10)
-x, y = map(np.ravel, np.meshgrid(x, y))
-
-model = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_RETURN,
-    max_variance=x,
-    max_cdar=y,
-    raise_on_failure=False,
-)
-model.fit(X_train)
-
-population_train = model.predict(X_train)
-
-fig = population_train.plot_measures(
-    x=RiskMeasure.ANNUALIZED_VARIANCE,
-    y=RiskMeasure.CDAR,
-    z=PerfMeasure.ANNUALIZED_MEAN,
-    to_surface=True,
-)
-fig.update_layout(scene_camera=dict(eye=dict(x=-2, y=-0.5, z=1)))
-show(fig)
-
-# %%
-# |
-#
-# Let's plot the composition of the portfolios:
-population_train.plot_composition()
-
-# %%
-# Let's compare the average and standard-deviation of the Sharpe Ratio and CDaR Ratio of
-# the portfolios on the training set versus the test set:
-#
-# Train:
-print(population_train.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO))
-print(population_train.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO))
-
-# %%
-# Test:
-print(population_test.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO))
-print(population_test.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO))
diff --git a/examples/1_mean_risk/plot_5_weight_constraints.py b/examples/1_mean_risk/plot_5_weight_constraints.py
deleted file mode 100644
index 4ac3229..0000000
--- a/examples/1_mean_risk/plot_5_weight_constraints.py
+++ /dev/null
@@ -1,219 +0,0 @@
-"""
-==================
-Weight Constraints
-==================
-
-This tutorial shows how to incorporate weight constraints into the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-We will show how to use the below parameters:
-    * min_weights
-    * max_weights
-    * budget
-    * min_budget
-    * max_budget
-    * max_short
-    * max_long
-    * linear_constraints
-    * groups
-    * left_inequality
-    * right_inequality
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# We select only 3 assets to make the example more readable, which are Apple (AAPL),
-# General Electric (GE) and JPMorgan (JPM):
-
-import numpy as np
-from plotly.io import show
-
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import MeanRisk
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices[["AAPL", "GE", "JPM"]]
-
-X = prices_to_returns(prices)
-
-# %%
-# Model
-# =====
-# In this tutorial, we will use a Minimum Variance model.
-# By default,  :class:`~deepfolio.optimization.MeanRisk` is long only (`min_weights=0`)
-# and fully invested (`budget=1`). In other terms, all weights are positive and sum to
-# one.
-model = MeanRisk()
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Budget
-# ======
-# The budget is the sum of long positions and short positions (sum of all weights).
-# It can be `None` or a float. `None` means that there are no budget constraints.
-# The default is `1.0` (fully invested).
-#
-# Examples:
-#
-#   * budget = 1    –> fully invested portfolio
-#   * budget = 0    –> market neutral portfolio
-#   * budget = None –> no constraints on the sum of weights
-
-model = MeanRisk(budget=0.5)
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# You can also set a constraint on the minimum and maximum budget using `min_budget`
-# and `max_budget`, which are the lower and upper bounds of the sum of long and short
-# positions (sum of all weights). The default is `None`. If provided, you must set
-# `budget=None`.
-model = MeanRisk(budget=None, min_budget=0.3, max_budget=0.5)
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Lower and Upper Bounds on Weights
-# =================================
-# The weights lower and upper bounds are controlled by the parameters `min_weights` and
-# `max_weights` respectively.
-# You can provide `None`, a float, an array-like or a dictionary.
-# `None` is equivalent to `-np.Inf` (no lower bounds).
-# If a float is provided, it is applied to each asset.
-# If a dictionary is provided, its (key/value) pair must be the (asset name/asset
-# weight bound) and the input `X` of the `fit` method must be a DataFrame with the
-# assets names in columns.
-# The default values are `min_weights=0.0` (no short selling) and `max_weights=1.0`
-# (each asset is below 100%). When using a dictionary, you don't have to provide
-# constraints for all assets. If not provided, the default values (0.0 for min_weights
-# and 1.0 for max_weights) will be assigned to the assets not specified in the
-# dictionary.
-#
-# .. note ::
-#
-#   When incorporating a pre-selection transformer into a Pipeline, using a list for
-#   weight constraints is not feasible, as we don't know in advance which assets will
-#   be selected by the pre-selection process. This is where the dictionary proves
-#   useful.
-#
-# Example:
-#   * min_weights = 0                     –> long only portfolio (no short selling).
-#   * min_weights = None                  –> no lower bound (same as -np.Inf).
-#   * min_weights = -2                    –> each weight must be above -200%.
-#   * min_weights = [0, -2, 0.5]          –> "AAPL", "GE" and "JPM" must be above 0%, -200% and 50% respectively.
-#   * min_weights = {"AAPL": 0, "GE": -2} -> "AAPL", "GE" and "JPM"  must be above 0%, -200% and 0% (default) respectively.
-#   * max_weights = 0                     –> no long position (short only portfolio).
-#   * max_weights = None                  –> no upper bound (same as +np.Inf).
-#   * max_weights = 2                     –> each weight must be below 200%.
-#   * max_weights = [1, 2, -0.5]          -> "AAPL", "GE" and "JPM"  must be below 100%, 200% and -50% respectively.
-#   * max_weights = {"AAPL": 1, "GE": 2}  -> "AAPL", "GE" and "JPM"  must be below 100%, 200% and 100% (default).
-
-# %%
-# Let's create a model that allows short positions with a budget of -100%:
-model = MeanRisk(budget=-1, min_weights=-1)
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Let's add weight constraints on "AAPL", "GE" and "JPM" to be above 0%, 50% and 10%
-# respectively:
-model = MeanRisk(min_weights=[0, 0.5, 0.1])
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Let's plot the composition:
-portfolio = model.predict(X)
-fig = portfolio.plot_composition()
-show(fig)
-
-# %%
-# |
-#
-# Let's create the same model as above but using partial dictionary:
-model = MeanRisk(min_weights={"GE": 0.5, "JPM": 0.1})
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Let's create a model with a leverage of 3 and every weights below 150%:
-model = MeanRisk(budget=3, max_weights=1.5)
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Short and Long Position Constraints
-# ===================================
-# Constraints on the upper bound for short and long positions can be set using
-# `max_short` and `max_long`. The short position is defined as the sum of negative
-# weights (in absolute term) and the long position as the sum of positive weights.
-
-# %%
-# Let's create a fully invested long-short portfolio model with a total short position
-# less than 50%:
-model = MeanRisk(min_weights=-1, max_short=0.5)
-model.fit(X)
-print(sum(model.weights_))
-model.weights_
-
-# %%
-# Group and Linear Constraints
-# ============================
-# We can assign groups to each asset using the `groups` parameter and set
-# constraints on these groups using the `linear_constraint` parameter.
-# The `groups` parameter can be a 2D array-like or a dictionary. If a dictionary is
-# provided, its (key/value)  pair must be the (asset name/asset groups).
-# You can reference these groups and/or the asset names in `linear_constraint`, which
-# is a list if strings following the below patterns:
-#
-#   * "2.5 * ref1 + 0.10 * ref2 + 0.0013 <= 2.5 * ref3"
-#   * "ref1 >= 2.9 * ref2"
-#   * "ref1 <= ref2"
-#   * "ref1 >= ref1"
-#
-# Let's create a model with groups constraints on "industry sector" and
-# "capitalization":
-groups = {
-    "AAPL": ["Technology", "Mega Cap"],
-    "GE": ["Industrial", "Big Cap"],
-    "JPM": ["Financial", "Big Cap"],
-}
-# You can also provide a 2D array-like:
-# groups = [["Technology", "Industrial", "Financial"], ["Mega Cap", "Big Cap", "Big Cap"]]
-linear_constraints = [
-    "Technology + 1.5 * Industrial <= 2 * Financial",  # First group
-    "Mega Cap >= 0.75 * Big Cap",  # Second group
-    "Technology >= Big Cap",  # Mix of first and second groups
-    "Mega Cap >= 2 * JPM",  # Mix of groups and assets
-]
-# Note that only the first constraint would be sufficient in that case.
-
-model = MeanRisk(groups=groups, linear_constraints=linear_constraints)
-model.fit(X)
-model.weights_
-
-# %%
-# Left and Right Inequalities
-# ===========================
-# Finally, you can also directly provide the matrix :math:`A` and the vector
-# :math:`b` of the linear constraint :math:`A \cdot w \leq b`:
-left_inequality = np.array(
-    [[1.0, 1.5, -2.0], [-1.0, 0.75, 0.75], [-1.0, 1.0, 1.0], [-1.0, -0.0, 2.0]]
-)
-right_inequality = np.array([0.0, 0.0, 0.0, 0.0])
-
-model = MeanRisk(left_inequality=left_inequality, right_inequality=right_inequality)
-model.fit(X)
-model.weights_
diff --git a/examples/1_mean_risk/plot_6_transaction_costs.py b/examples/1_mean_risk/plot_6_transaction_costs.py
deleted file mode 100644
index 855b55e..0000000
--- a/examples/1_mean_risk/plot_6_transaction_costs.py
+++ /dev/null
@@ -1,199 +0,0 @@
-r"""
-=================
-Transaction Costs
-=================
-
-This tutorial shows how to incorporate transaction costs (TC) into the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-TC are fixed costs incurred when buying or selling an asset.
-
-By using the `transaction_costs` parameter, you can add linear TC to the optimization
-problem:
-
-.. math:: total\_cost = \sum_{i=1}^{N} c_{i} \times |w_{i} - w\_prev_{i}|
-
-with :math:`c_{i}` the TC of asset i, :math:`w_{i}` its weight and :math:`w\_prev_{i}`
-its previous weight (defined in `previous_weights`).
-The float :math:`total\_cost` is impacting the portfolio expected return in the
-optimization:
-
-.. math:: expected\_return = \mu^{T} \cdot w - total\_cost
-
-with :math:`\mu` the vector af assets expected returns and :math:`w` the vector of
-assets weights.
-
-the `transaction_costs` parameter can be a float, a dictionary or an array-like of
-shape `(n_assets, )`. If a float is provided, it is applied to each asset.
-If a dictionary is provided, its (key/value) pair must be the (asset name/asset TC) and
-the input `X` of the `fit` method must be a DataFrame with the assets names in columns.
-The default is 0.0 (no transaction costs).
-
-.. warning::
-
-    According to the above formula, the periodicity of the transaction costs
-    needs to be homogenous to the periodicity of :math:`\mu`. For example, if
-    the input `X` is composed of **daily** returns, the `transaction_costs` need
-    to be expressed as **daily** costs.
-
-This means that you need to convert this fixed transaction costs into daily costs. To
-achieve this, you need the notion of expected investment duration. This is crucial since
-the optimization problem has no notion of investment duration.
-
-For example, let's assume that asset A has an expected daily return of 0.01%
-with a TC of 1% and asset B has an expected daily return of 0.005% with no TC.
-Let's assume both assets have the same volatility and a correlation of 1.0.
-If the investment duration is only one month, we should allocate all the weights to
-asset B. However, if the investment duration is one year, we should allocate all the
-weights to asset A.
-
-Example:
-    * Duration = 1 months (21 business days):
-        * 1 month expected return A ≈ -0.8%
-        * 1 month expected return B ≈ 0.1%
-    * Duration = 1 year (252 business days):
-        * 1 year expected return A ≈ 1.5%
-        * 1 year expected return B ≈ 1.3%
-
-So in order to take that duration into account, you should divide the fix TC by the
-expected investment duration.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# We select only 3 assets to make the example more readable, which are Apple (AAPL),
-# General Electric (GE) and JPMorgan (JPM):
-
-import numpy as np
-from plotly.io import show
-
-from deepfolio import MultiPeriodPortfolio, Population, Portfolio
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.model_selection import WalkForward, cross_val_predict
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices[["AAPL", "GE", "JPM"]]
-
-X = prices_to_returns(prices)
-
-# %%
-# Model
-# =====
-# In this tutorial, we will use the Maximum Mean-Variance Utility model with a risk
-# aversion of 1.0:
-model = MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_UTILITY)
-model.fit(X)
-model.weights_
-
-# %%
-# Transaction Cost
-# ================
-# Let's assume we have the below TC:
-#   * Apple: 1%
-#   * General Electric: 0.50%
-#   * JPMorgan: 0.20%
-#
-# and an investment duration of one month (21 business days):
-transaction_costs = {"AAPL": 0.01 / 21, "GE": 0.005 / 21, "JPM": 0.002 / 21}
-# Same as transaction_costs = np.array([0.01, 0.005, 0.002]) / 21
-
-# %%
-# First, we assume that there is no previous position:
-model_tc = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
-    transaction_costs=transaction_costs,
-)
-model_tc.fit(X)
-model_tc.weights_
-
-# %%
-# The higher TC of Apple induced a change of weights toward JPMorgan:
-model_tc.weights_ - model.weights_
-
-# %%
-# Now, let's assume that the previous position was equal-weighted:
-model_tc2 = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
-    transaction_costs=transaction_costs,
-    previous_weights=np.ones(3) / 3,
-)
-model_tc2.fit(X)
-model_tc2.weights_
-
-# %%
-# Notice that the weight of General Electric becomes non-negligible due to the cost of
-# rebalancing the position:
-model_tc2.weights_ - model.weights_
-
-# %%
-# Multi-period portfolio
-# ======================
-# Let's assume that we want to rebalance our portfolio every 60 days by re-fitting the
-# model on the latest 60 days. We test the impact of TC using Walk Forward Analysis:
-holding_period = 60
-fitting_period = 60
-cv = WalkForward(train_size=fitting_period, test_size=holding_period)
-
-
-# %%
-# As explained above, we transform the fix TC into a daily cost by dividing the TC by
-# the expected investment duration:
-transaction_costs = np.array([0.01, 0.005, 0.002]) / holding_period
-
-# %%
-# First, we train and test the model without TC:
-model = MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_UTILITY)
-# pred1 is a MultiPeriodPortfolio
-pred1 = cross_val_predict(model, X, cv=cv, n_jobs=-1)
-pred1.name = "pred1"
-
-# %%
-# Then, we train the model without TC and test it with TC. The model trained without TC
-# is the same as above so we can retrieve the results and simply update the prediction
-# with the TC:
-pred2 = MultiPeriodPortfolio(name="pred2")
-previous_weights = None
-for portfolio in pred1:
-    new_portfolio = Portfolio(
-        X=portfolio.X,
-        weights=portfolio.weights,
-        previous_weights=previous_weights,
-        transaction_costs=transaction_costs,
-    )
-    previous_weights = portfolio.weights
-    pred2.append(new_portfolio)
-
-# %%
-# Finally, we train and test the model with TC. Note that we cannot use the
-# `cross_val_predict` function anymore because it uses parallelization and cannot handle
-# the `previous_weights` dependency between folds:
-pred3 = MultiPeriodPortfolio(name="pred3")
-
-model.set_params(transaction_costs=transaction_costs)
-previous_weights = None
-for train, test in cv.split(X):
-    X_train = X.take(train)
-    X_test = X.take(test)
-    model.set_params(previous_weights=previous_weights)
-    model.fit(X_train)
-    portfolio = model.predict(X_test)
-    pred3.append(portfolio)
-    previous_weights = model.weights_
-
-# %%
-# We visualize the results by plotting the cumulative returns of the successive test
-# periods:
-population = Population([pred1, pred2, pred3])
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# If we exclude the unrealistic prediction without TC, we notice that the model
-# **fitted with TC** outperforms the model **fitted without TC**.
diff --git a/examples/1_mean_risk/plot_7_management_fees.py b/examples/1_mean_risk/plot_7_management_fees.py
deleted file mode 100644
index 3c2ea11..0000000
--- a/examples/1_mean_risk/plot_7_management_fees.py
+++ /dev/null
@@ -1,135 +0,0 @@
-r"""
-===============
-Management Fees
-===============
-
-This tutorial shows how to incorporate management fees (MF) into the 
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-By using The `management_fees` parameter, you can add linear MF to the optimization 
-problem:
-
-.. math:: total\_fee = \sum_{i=1}^{N} f_{i} \times w_{i}
-
-with :math:`f_{i}` the management fee of asset i and :math:`w_{i}` its weight.
-The float :math:`total\_fee` is impacting the portfolio expected return in the optimization:
-
-.. math:: expected\_return = \mu^{T} \cdot w - total\_fee
-
-with :math:`\mu` the vector af assets expected returns and :math:`w` the vector of 
-assets weights.
-
-The `management_fees` parameter can be a float, a dictionary or an array-like of
-shape `(n_assets, )`. If a float is provided, it is applied to each asset.
-If a dictionary is provided, its (key/value) pair must be the (asset name/asset MF) and
-the input `X` of the `fit` method must be a DataFrame with the assets names in
-columns. The default is 0.0 (no management fees).
-
-.. note::
-
-    Another approach is to direcly impact the MF to the input `X` in order to express 
-    the returns net of fee. However, when estimating the :math:`\mu` parameter using,
-    for example, Shrinkage estimators, this approach would mix a deterministic amount
-    with an uncertain one leading to unwanted bias in the management fees.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# We select only 3 assets to make the example more readable, which are Apple (AAPL),
-# General Electric (GE) and JPMorgan (JPM).
-
-import numpy as np
-from plotly.io import show
-
-from deepfolio import Population
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.model_selection import WalkForward, cross_val_predict
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices[["AAPL", "GE", "JPM"]]
-
-X = prices_to_returns(prices)
-
-# %%
-# Model
-# =====
-# In this tutorial, we will use the Maximum Mean-Variance Utility model with a risk
-# aversion of 1.0:
-model = MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_UTILITY)
-model.fit(X)
-model.weights_
-
-# %%
-# Management Fees
-# ===============
-# Management fees are usually used in assets under management but for this example we
-# will assume that it also applies for the below stocks:
-#
-#   * Apple: 3% p.a.
-#   * General Electric: 6% p.a.
-#   * JPMorgan: 1% p.a.
-#
-# The MF are expressed in per annum, so we need to convert them in daily MF.
-# We suppose 252 trading days in a year:
-management_fees = {"AAPL": 0.03 / 252, "GE": 0.06 / 252, "JPM": 0.01 / 252}
-# Same as management_fees = np.array([0.03, 0.06, 0.01]) / 252
-
-model_mf = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
-    management_fees=management_fees,
-)
-model_mf.fit(X)
-model_mf.weights_
-
-# %%
-# The higher MF of Apple induced a change of weights toward JPMorgan:
-model_mf.weights_ - model.weights_
-
-# %%
-# Multi-period portfolio
-# ======================
-# Let's assume that we want to rebalance our portfolio every 60 days by re-fitting the
-# model on the latest 60 days. We test the impact of MF using Walk Forward Analysis:
-holding_period = 60
-fitting_period = 60
-cv = WalkForward(train_size=fitting_period, test_size=holding_period)
-
-# %%
-# As explained above, we transform the yearly MF into a daily MF:
-management_fees = np.array([0.03, 0.06, 0.01]) / 252
-
-# %%
-# First, we train the model without MF and test it with MF.
-# Note that `portfolio_params` are parameters passed to the Portfolio during `predict`
-# and **not** during `fit`:
-model = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
-    portfolio_params=dict(management_fees=management_fees),
-)
-# pred1 is a MultiPeriodPortfolio
-pred1 = cross_val_predict(model, X, cv=cv, n_jobs=-1)
-pred1.name = "pred1"
-
-# %%
-# Then, we train and test the model with MF:
-model.set_params(management_fees=management_fees)
-pred2 = cross_val_predict(model, X, cv=cv, n_jobs=-1)
-pred2.name = "pred2"
-
-# %%
-# We visualize the results by plotting the cumulative returns of the successive test
-# periods:
-population = Population([pred1, pred2])
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# We notice that the model **fitted with MF** outperform the model **fitted without
-# MF**.
diff --git a/examples/1_mean_risk/plot_8_regularization.py b/examples/1_mean_risk/plot_8_regularization.py
deleted file mode 100644
index e081f76..0000000
--- a/examples/1_mean_risk/plot_8_regularization.py
+++ /dev/null
@@ -1,331 +0,0 @@
-r"""
-========================
-L1 and L2 Regularization
-========================
-
-This tutorial shows how to incorporate regularization into the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-Regularization tends to increase robustness and out-of-sample stability.
-
-The `l1_coef` parameter is used to penalize the objective function by the L1 norm:
-
-.. math:: l1\_coef \times \Vert w \Vert_{1} = l1\_coef \times \sum_{i=1}^{N} |w_{i}|
-
-and the `l2_coef` parameter is used to penalize the objective function by the L2 norm:
-
-.. math:: l2\_coef \times \Vert w \Vert_{2}^{2} = l2\_coef \times \sum_{i=1}^{N} w_{i}^2
-
-.. warning ::
-
-    Increasing the L1 coefficient may reduce the number of non-zero weights
-    (cardinality), which can reduce diversification. However, a reduction in
-    diversification does not necessarily equate to a reduction in robustness.
-
-.. note ::
-
-    Increasing the L1 coefficient has no impact if the portfolio is long only.
-
-In this example we will use a dataset with a large number of assets and long-short
-allocation to exacerbate overfitting.
-
-First, we will analyze the impact of regularization on the entire Mean-Variance efficient
-frontier and its stability from the training set to the test set. Then, we will show how
-to tune the regularization coefficients using cross-validation with `GridSearchCV`.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 :ref:`dataset <datasets>` composed of the daily prices of 64
-# assets from the FTSE 100 Index composition starting from 2000-01-04 up to 2023-05-31.
-import numpy as np
-import plotly.graph_objects as go
-from plotly.io import show
-from scipy.stats import loguniform
-from PyTorch import clone
-from PyTorch.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
-
-from deepfolio import PerfMeasure, Population, RatioMeasure, RiskMeasure
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.metrics import make_scorer
-from deepfolio.model_selection import WalkForward, cross_val_predict
-from deepfolio.optimization import EqualWeighted, MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_ftse100_dataset()
-X = prices_to_returns(prices)
-
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Efficient Frontier
-# ==================
-# First, we create a Mean-Variance model to estimate the efficient frontier without
-# regularization. We constrain the volatility to be below 30% p.a.
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    min_weights=-1,
-    max_variance=0.3**2 / 252,
-    efficient_frontier_size=30,
-    portfolio_params=dict(name="Mean-Variance", tag="No Regularization"),
-)
-model.fit(X_train)
-model.weights_.shape
-
-# %%
-# Now we create the two regularized models:
-model_l1 = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    min_weights=-1,
-    max_variance=0.3**2 / 252,
-    efficient_frontier_size=30,
-    l1_coef=0.001,
-    portfolio_params=dict(name="Mean-Variance", tag="L1 Regularization"),
-)
-model_l1.fit(X_train)
-
-model_l2 = clone(model_l1)
-model_l2.set_params(
-    l1_coef=0,
-    l2_coef=0.001,
-    portfolio_params=dict(name="Mean-Variance", tag="L2 Regularization"),
-)
-model_l2.fit(X_train)
-model_l2.weights_.shape
-
-# %%
-# Let's plot the efficient frontiers on the training set:
-population_train = (
-    model.predict(X_train) + model_l1.predict(X_train) + model_l2.predict(X_train)
-)
-
-population_train.plot_measures(
-    x=RiskMeasure.ANNUALIZED_STANDARD_DEVIATION,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
-
-# %%
-# Prediction
-# ==========
-# The parameter `efficient_frontier_size=30` means that when we called the `fit` method,
-# each model ran 30 optimizations along the efficient frontier. Therefore, the `predict`
-# method will return a :class:`~deepfolio.population.Population` composed of 30
-# :class:`~deepfolio.portfolio.Portfolio`:
-population_test = (
-    model.predict(X_test) + model_l1.predict(X_test) + model_l2.predict(X_test)
-)
-
-for tag in ["No Regularization", "L1 Regularization"]:
-    print("=================")
-    print(tag)
-    print("=================")
-    print(
-        "Avg Sharpe Ratio Train:"
-        f" {population_train.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO, tags=tag):0.2f}"
-    )
-    print(
-        "Avg Sharpe Ratio Test:"
-        f" {population_test.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO, tags=tag):0.2f}"
-    )
-    print(
-        "Avg non-zeros assets:"
-        f" {np.mean([len(ptf.nonzero_assets) for ptf in population_train.filter(tags=tag)]):0.2f}"
-    )
-    print("\n")
-
-population_test.plot_measures(
-    x=RiskMeasure.ANNUALIZED_STANDARD_DEVIATION,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
-
-# %%
-# In this example we can clearly see that L1 regularization reduced the number of assets
-# (from 64 down to 14) and made the model more robust: the portfolios without
-# regularization have a higher Sharpe on the train set and a lower Sharpe on the test
-# set compared to the portfolios with regularization.
-
-# %%
-# Hyper-parameter Tuning
-# ======================
-# In this section, we consider a 3 months rolling (60 business days) long-short
-# allocation fitted on the preceding year of data (252 business days) that maximizes the
-# return under a volatility constraint of 30% p.a.
-#
-# We use `GridSearchCV` to select the optimal L1 and L2 regularization coefficients on
-# the training set using cross-validation that achieve the highest
-# mean test score. We use the default score, which is the Sharpe ratio.
-# Finally, we evaluate the model on the test set and compare it with the equal-weighted
-# benchmark and a reference model without regularization:
-
-ref_model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    objective_function=ObjectiveFunction.MAXIMIZE_RETURN,
-    max_variance=0.3**2 / 252,
-    min_weights=-1,
-)
-
-cv = WalkForward(train_size=252, test_size=60)
-
-grid_search = GridSearchCV(
-    estimator=ref_model,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={
-        "l1_coef": [0.001, 0.01, 0.1],
-        "l2_coef": [0.001, 0.01, 0.1],
-    },
-)
-grid_search.fit(X_train)
-best_model = grid_search.best_estimator_
-print(best_model)
-
-# %%
-# The optimal parameters among the above 3x3 grid are 0.01 for the L1 coefficient
-# and the L2 coefficient.
-# These parameters are the ones that achieved the highest mean out-of-sample Sharpe
-# Ratio. Note that the score can be changed to another measure or function using the
-# `scoring` parameter.
-#
-# For continuous parameters, such as L1 and L2 above, a better approach is to use
-# `RandomizedSearchCV` and specify a continuous distribution to take full advantage of
-# the randomization.
-#
-# A continuous log-uniform random variable is the continuous version of a log-spaced
-# parameter. For example, to specify the equivalent of the L1 parameter from above,
-# `loguniform(1e-3, 1e-1)` can be used instead of `[0.001, 0.01, 0.1]`.
-#
-# Mirroring the example above in grid search, we can specify a continuous random
-# variable that is log-uniformly distributed between 1e-3 and 1e-1:
-
-randomized_search = RandomizedSearchCV(
-    estimator=ref_model,
-    cv=cv,
-    n_jobs=-1,
-    param_distributions={
-        "l2_coef": loguniform(1e-3, 1e-1),
-    },
-    n_iter=100,
-    return_train_score=True,
-    scoring=make_scorer(RatioMeasure.ANNUALIZED_SHARPE_RATIO),
-)
-randomized_search.fit(X_train)
-best_model_rd = randomized_search.best_estimator_
-print(best_model_rd)
-
-# %%
-# Let's plot both the average in-sample and out-of-sample scores (annualized Sharpe
-# ratio) as a function of `l2_coef`:
-
-cv_results = randomized_search.cv_results_
-x = np.asarray(cv_results["param_l2_coef"]).astype(float)
-sort_idx = np.argsort(x)
-y_train_mean = cv_results["mean_train_score"][sort_idx]
-y_train_std = cv_results["std_train_score"][sort_idx]
-y_test_mean = cv_results["mean_test_score"][sort_idx]
-y_test_std = cv_results["std_test_score"][sort_idx]
-x = x[sort_idx]
-
-fig = go.Figure([
-    go.Scatter(
-        x=x,
-        y=y_train_mean,
-        name="Train",
-        mode="lines",
-        line=dict(color="rgb(31, 119, 180)"),
-    ),
-    go.Scatter(
-        x=x,
-        y=y_train_mean + y_train_std,
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-    ),
-    go.Scatter(
-        x=x,
-        y=y_train_mean - y_train_std,
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-        fillcolor="rgba(31, 119, 180,0.15)",
-        fill="tonexty",
-    ),
-    go.Scatter(
-        x=x,
-        y=y_test_mean,
-        name="Test",
-        mode="lines",
-        line=dict(color="rgb(255,165,0)"),
-    ),
-    go.Scatter(
-        x=x,
-        y=y_test_mean + y_test_std,
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-    ),
-    go.Scatter(
-        x=x,
-        y=y_test_mean - y_test_std,
-        line=dict(width=0),
-        mode="lines",
-        fillcolor="rgba(255,165,0, 0.15)",
-        fill="tonexty",
-        showlegend=False,
-    ),
-])
-fig.add_vline(
-    x=randomized_search.best_params_["l2_coef"],
-    line_width=2,
-    line_dash="dash",
-    line_color="green",
-)
-fig.update_layout(
-    title="Train/Test score",
-    xaxis_title="L2 Coef",
-    yaxis_title="Annualized Sharpe Ratio",
-)
-fig.update_yaxes(tickformat=".2f")
-show(fig)
-
-# %%
-# |
-#
-# The highest mean out-of-sample Sharpe Ratio is 1.55 and is achieved for a L2 coef of
-# 0.023.
-# Also note that without regularization, the mean train Sharpe Ratio is around
-# six time higher than the mean test Sharpe Ratio. That would be a clear indiction of
-# overfitting.
-#
-# Now, we analyze all three models on the test set. By using `cross_val_predict` with
-# `WalkForward`, we are able to compute efficiently the `MultiPeriodPortfolio`
-# composed of 60 days rolling portfolios fitted on the preceding 252 days:
-
-benchmark = EqualWeighted()
-pred_bench = cross_val_predict(benchmark, X_test, cv=cv)
-pred_bench.name = "Benchmark"
-
-pred_no_reg = cross_val_predict(ref_model, X_test, cv=cv)
-pred_no_reg.name = "No Regularization"
-
-pred_reg = cross_val_predict(best_model, X_test, cv=cv, n_jobs=-1)
-pred_reg.name = "Regularization"
-
-population = Population([pred_no_reg, pred_reg, pred_bench])
-population.plot_cumulative_returns()
-
-# %%
-# From the plot and the below summary, we can see that the un-regularized model is
-# overfitted and perform poorly on the test set. Its annualized volatility is 54%, which
-# is significantly above the model upper-bound of 30% and its Sharpe Ratio is 0.32 which
-# is the lowest of all models.
-
-population.summary()
-
-# %%
-# Finally, we plot the composition of the regularized multi-period portfolio:
-pred_reg.plot_composition()
diff --git a/examples/1_mean_risk/plot_9_uncertainty_set.py b/examples/1_mean_risk/plot_9_uncertainty_set.py
deleted file mode 100644
index 9d97f7e..0000000
--- a/examples/1_mean_risk/plot_9_uncertainty_set.py
+++ /dev/null
@@ -1,229 +0,0 @@
-r"""
-===============
-Uncertainty Set
-===============
-
-This tutorial shows how to incorporate expected returns uncertainty sets into the
-:class:`~deepfolio.optimization.MeanRisk` optimization.
-
-By using the :ref:`Mu Uncertainty set estimator <uncertainty_set_estimator>`,
-the assets expected returns are modelled with an ellipsoidal uncertainty set.
-This approach, known as worst-case optimization, falls under the umbrella of robust
-optimization. It reduces the instability that arises from the estimation errors of the
-expected returns.
-
-The worst case portfolio expect return is:
-
-    .. math:: w^T\hat{\mu} - \kappa_{\mu}\lVert S_{\mu}^\frac{1}{2}w\rVert_{2}
-
-with :math:`\kappa` the size of the ellipsoid (confidence region) and :math:`S` its
-shape.
-
-In this example, we will use a Mean-CVaR model with an
-:class:`~deepfolio.uncertainty_set.EmpiricalMuUncertaintySet` estimator.
-
-Note that other uncertainty set can be used, for example:
-:class:`~deepfolio.uncertainty_set.BootstrapMuUncertaintySet`.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 :ref:`dataset <datasets>` composed of the daily prices of 64
-# assets from the FTSE 100 Index composition starting from 2000-01-04 up to 2023-05-31:
-import numpy as np
-import plotly.graph_objects as go
-from plotly.io import show
-from scipy.stats import uniform
-from PyTorch import clone
-from PyTorch.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
-
-from deepfolio import PerfMeasure, Population, RatioMeasure, RiskMeasure
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.metrics import make_scorer
-from deepfolio.model_selection import WalkForward, cross_val_predict
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.uncertainty_set import EmpiricalMuUncertaintySet
-
-prices = load_ftse100_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Efficient Frontier
-# ==================
-# First, we create a Mean-CVaR model to estimate the efficient frontier without
-# uncertainty set. We constrain the CVaR at 95% to be below 2% (representing the
-# average loss of the worst 5% daily returns over the period):
-model = MeanRisk(
-    risk_measure=RiskMeasure.CVAR,
-    min_weights=-1,
-    max_cvar=0.02,
-    efficient_frontier_size=20,
-    portfolio_params=dict(name="Mean-CVaR", tag="No Uncertainty Set"),
-)
-model.fit(X_train)
-model.weights_.shape
-
-# %%
-# Now, we create a robust (worst case) Mean-CVaR model with an uncertainty set on the
-# expected returns:
-model_uncertainty = MeanRisk(
-    risk_measure=RiskMeasure.CVAR,
-    min_weights=-1,
-    max_cvar=0.02,
-    efficient_frontier_size=20,
-    mu_uncertainty_set_estimator=EmpiricalMuUncertaintySet(confidence_level=0.60),
-    portfolio_params=dict(name="Mean-CVaR", tag="Mu Uncertainty Set - 60%"),
-)
-model_uncertainty.fit(X_train)
-model_uncertainty.weights_.shape
-
-# %%
-# Let's plot both efficient frontiers on the training set:
-population_train = model.predict(X_train) + model_uncertainty.predict(X_train)
-
-population_train.plot_measures(
-    x=RiskMeasure.CVAR,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-    color_scale=RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-    hover_measures=[RiskMeasure.MAX_DRAWDOWN, RatioMeasure.ANNUALIZED_SORTINO_RATIO],
-)
-
-# %%
-# Hyper-Parameter Tuning
-# ======================
-# In this section, we consider a 3 months rolling (60 business days) long-short
-# allocation fitted on the preceding year of data (252 business days) that maximizes the
-# portfolio return under a CVaR constraint.
-# We will use `GridSearchCV` to select the below model parameters on the training set
-# using walk forward analysis with a Mean/CVaR ratio scoring.
-#
-# The model parameters to tune are:
-#
-#   * `max_cvar`: CVaR target (upper constraint)
-#   * `cvar_beta`: CVaR confidence level
-#   * `confidence_level`: Mu uncertainty set confidence level of the :class:`~deepfolio.uncertainty_set.EmpiricalMuUncertaintySet`
-#
-# For embedded parameters in the `GridSearchCV`, you need to use a double underscore:
-# `mu_uncertainty_set_estimator__confidence_level`
-
-model_no_uncertainty = MeanRisk(
-    risk_measure=RiskMeasure.CVAR,
-    objective_function=ObjectiveFunction.MAXIMIZE_RETURN,
-    max_cvar=0.02,
-    cvar_beta=0.9,
-    min_weights=-1,
-)
-
-model_uncertainty = clone(model_no_uncertainty)
-model_uncertainty.set_params(mu_uncertainty_set_estimator=EmpiricalMuUncertaintySet())
-
-cv = WalkForward(train_size=252, test_size=60)
-
-grid_search = GridSearchCV(
-    estimator=model_uncertainty,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={
-        "mu_uncertainty_set_estimator__confidence_level": [0.80, 0.90],
-        "max_cvar": [0.03, 0.04, 0.05],
-        "cvar_beta": [0.8, 0.9, 0.95],
-    },
-    scoring=make_scorer(RatioMeasure.CVAR_RATIO),
-)
-grid_search.fit(X_train)
-best_model = grid_search.best_estimator_
-print(best_model)
-
-# %%
-# The optimal parameters among the above 2x3x3 grid are the `max_cvar=3%`,
-# `cvar_beta=90%` and :class:`~deepfolio.uncertainty_set.EmpiricalMuUncertaintySet`
-# `confidence_level=80%`. These parameters are the ones that achieved the highest mean
-# out-of-sample Mean/CVaR ratio.
-#
-# For continuous parameters, such as `confidence_level`, a better approach is to use
-# `RandomizedSearchCV` and specify a continuous distribution to take full advantage of
-# the randomization. We specify a continuous random variable that is uniformly
-# distributed between 0 and 1:
-
-randomized_search = RandomizedSearchCV(
-    estimator=model_uncertainty,
-    cv=cv,
-    n_jobs=-1,
-    param_distributions={
-        "mu_uncertainty_set_estimator__confidence_level": uniform(loc=0, scale=1),
-    },
-    n_iter=50,
-    scoring=make_scorer(RatioMeasure.CVAR_RATIO),
-)
-randomized_search.fit(X_train)
-best_model_rs = randomized_search.best_estimator_
-
-# %%
-# The selected confidence level is 58%.
-#
-# Let's plot the average out-of-sample score (CVaR ratio) as a function of the
-# uncertainty set confidence level:
-cv_results = randomized_search.cv_results_
-x = np.asarray(
-    cv_results["param_mu_uncertainty_set_estimator__confidence_level"]
-).astype(float)
-sort_idx = np.argsort(x)
-y_test_mean = cv_results["mean_test_score"][sort_idx]
-x = x[sort_idx]
-
-fig = go.Figure([
-    go.Scatter(
-        x=x,
-        y=y_test_mean,
-        name="Test",
-        mode="lines",
-        line=dict(color="rgb(255,165,0)"),
-    ),
-])
-fig.add_vline(
-    x=randomized_search.best_params_["mu_uncertainty_set_estimator__confidence_level"],
-    line_width=2,
-    line_dash="dash",
-    line_color="green",
-)
-fig.update_layout(
-    title="Test score",
-    xaxis_title="Uncertainty Set Confidence Level",
-    yaxis_title="CVaR Ratio",
-)
-fig.update_yaxes(tickformat=".3f")
-fig.update_xaxes(tickformat=".0%")
-show(fig)
-
-# %%
-# |
-#
-# Now, we analyze all three models on the test set.
-# By using `cross_val_predict` with `WalkForward`, we are able to compute efficiently
-# the `MultiPeriodPortfolio` composed of 60 days rolling portfolios fitted on the
-# preceding 252 days:
-pred_no_uncertainty = cross_val_predict(model_no_uncertainty, X_test, cv=cv)
-pred_no_uncertainty.name = "No Uncertainty set"
-
-pred_uncertainty = cross_val_predict(best_model, X_test, cv=cv, n_jobs=-1)
-pred_uncertainty.name = "Uncertainty set - Grid Search"
-
-pred_uncertainty_rs = cross_val_predict(best_model_rs, X_test, cv=cv, n_jobs=-1)
-pred_uncertainty_rs.name = "Uncertainty set - Randomized Search"
-
-population = Population([pred_no_uncertainty, pred_uncertainty, pred_uncertainty_rs])
-population.plot_cumulative_returns()
-
-# %%
-# From the plot and the below summary, we can see that the model without uncertainty set
-# is overfitted and perform poorly on the test set. Its CVaR at 95% is 10% and its
-# Mean/CVaR ratio is 0.006 which is the lowest of all models.
-population.summary()
-
-# %%
-# Finally, let's plot the composition of the regularized multi-period portfolio:
-pred_uncertainty.plot_composition()
diff --git a/examples/2_risk_budgeting/README.txt b/examples/2_risk_budgeting/README.txt
deleted file mode 100644
index 7a5de4a..0000000
--- a/examples/2_risk_budgeting/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _risk_budgeting_examples:
-
-Risk Budgeting
---------------
-
-Examples concerning the :class:`~deepfolio.optimization.RiskBudgeting` optimization.
-
diff --git a/examples/2_risk_budgeting/plot_1_risk_parity_variance.py b/examples/2_risk_budgeting/plot_1_risk_parity_variance.py
deleted file mode 100644
index 402e5ec..0000000
--- a/examples/2_risk_budgeting/plot_1_risk_parity_variance.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-======================
-Risk Parity - Variance
-======================
-
-This tutorial uses the :class:`~deepfolio.optimization.RiskBudgeting` optimization to
-find the risk parity portfolio with variance as the risk measure.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import InverseVolatility, RiskBudgeting
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create the risk parity model and then fit it on the training set:
-model = RiskBudgeting(
-    risk_measure=RiskMeasure.VARIANCE,
-    portfolio_params=dict(name="Risk Parity - Variance"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use an inverse volatility benchmark using
-# the :class:`~deepfolio.optimization.InverseVolatility` estimator.
-bench = InverseVolatility(portfolio_params=dict(name="Inverse Vol"))
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Risk Contribution Analysis
-# ==========================
-# Let's analyze the risk contribution of both models on the training set.
-# As expected, the risk parity model has the same variance contribution for each asset:
-ptf_model_train = model.predict(X_train)
-ptf_model_train.plot_contribution(measure=RiskMeasure.ANNUALIZED_VARIANCE)
-
-# %%
-# And the inverse volatility model has non-equal variance contribution. This is because
-# the correlation is not taken into account in an inverse volatility model:
-ptf_bench_train = bench.predict(X_train)
-ptf_bench_train.plot_contribution(measure=RiskMeasure.ANNUALIZED_VARIANCE)
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-ptf_model_test = model.predict(X_test)
-ptf_bench_test = bench.predict(X_test)
-
-# %%
-# The `predict` method returns a :class:`~deepfolio.portfolio.Portfolio` object.
-
-
-# %%
-# Analysis
-# ========
-# For improved analysis, we load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([ptf_model_test, ptf_bench_test])
-
-# %%
-# Let's plot each portfolio composition:
-population.plot_composition()
-
-# %%
-# Let's plot each portfolio cumulative returns:
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# Finally, we print a full summary of both strategies evaluated on the test set:
-population.summary()
diff --git a/examples/2_risk_budgeting/plot_2_risk_busgeting_CVaR.py b/examples/2_risk_budgeting/plot_2_risk_busgeting_CVaR.py
deleted file mode 100644
index 2e02b8a..0000000
--- a/examples/2_risk_budgeting/plot_2_risk_busgeting_CVaR.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-=====================
-Risk Budgeting - CVaR
-=====================
-
-This tutorial uses the :class:`~deepfolio.optimization.RiskBudgeting` optimization to
-build a risk budgeting portfolio by specifying a risk budget on each asset with CVaR as
-the risk measure.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import InverseVolatility, RiskBudgeting
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Risk Budget
-# ===========
-# We chose the following risk budget: 1.5 on Apples, 0.2 on General Electric and
-# JPMorgan and 1.0 on the remaining assets:
-risk_budget = {asset_name: 1 for asset_name in X.columns}
-risk_budget["AAPL"] = 1.5
-risk_budget["GE"] = 0.2
-risk_budget["JPM"] = 0.2
-
-# %%
-# Model
-# =====
-# We create the risk budgeting model and then fit it on the training set:
-model = RiskBudgeting(
-    risk_measure=RiskMeasure.CVAR,
-    risk_budget=risk_budget,
-    portfolio_params=dict(name="Risk Budgeting - CVaR"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use an inverse volatility benchmark using
-# the :class:`~deepfolio.optimization.InverseVolatility` estimator:
-bench = InverseVolatility(portfolio_params=dict(name="Inverse Vol"))
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Risk Contribution Analysis
-# ==========================
-# Let's analyze the risk contribution of both models on the training set.
-# As expected, the risk budgeting model has 50% more CVaR contribution to Apple and 80%
-# less to General Electric and JPMorgan compared to the other assets:
-ptf_model_train = model.predict(X_train)
-fig = ptf_model_train.plot_contribution(measure=RiskMeasure.CVAR)
-show(fig)
-
-# %%
-# |
-#
-# And the inverse volatility model has different CVaR contribution for each asset:
-ptf_bench_train = bench.predict(X_train)
-ptf_bench_train.plot_contribution(measure=RiskMeasure.CVAR)
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-ptf_model_test = model.predict(X_test)
-ptf_bench_test = bench.predict(X_test)
-
-# %%
-# Analysis
-# ========
-# For improved analysis, it's possible to load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([ptf_model_test, ptf_bench_test])
-
-# %%
-# Let's plot each portfolio composition:
-population.plot_composition()
-
-# %%
-# Let's plot each portfolio cumulative returns:
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# Finally, we print a full summary of both strategies evaluated on the test set:
-population.summary()
diff --git a/examples/2_risk_budgeting/plot_3_risk_parity_ledoit_wolf.py b/examples/2_risk_budgeting/plot_3_risk_parity_ledoit_wolf.py
deleted file mode 100644
index eda8864..0000000
--- a/examples/2_risk_budgeting/plot_3_risk_parity_ledoit_wolf.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-==================================
-Risk Parity - Covariance shrinkage
-==================================
-
-This tutorial shows how to incorporate covariance shrinkage in the
-:class:`~deepfolio.optimization.RiskBudgeting` optimization.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.moments import ShrunkCovariance
-from deepfolio.optimization import RiskBudgeting
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create a risk parity model by using :class:`~deepfolio.moments.ShrunkCovariance` as
-# the covariance estimator then fit it on the training set:
-model = RiskBudgeting(
-    risk_measure=RiskMeasure.VARIANCE,
-    prior_estimator=EmpiricalPrior(
-        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
-    ),
-    portfolio_params=dict(name="Risk Parity - Covariance Shrinkage"),
-)
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use a basic risk parity without covariance shrinkage:
-bench = RiskBudgeting(
-    risk_measure=RiskMeasure.VARIANCE,
-    portfolio_params=dict(name="Risk Parity - Basic"),
-)
-bench.fit(X_train)
-bench.weights_
-
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-ptf_model_test = model.predict(X_test)
-ptf_bench_test = bench.predict(X_test)
-
-
-# %%
-# Analysis
-# ========
-# For improved analysis, it's possible to load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([ptf_model_test, ptf_bench_test])
-
-# %%
-# Let's plot each portfolio cumulative returns:
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# |
-#
-# Finally, we print a full summary of both strategies evaluated on the test set:
-population.summary()
diff --git a/examples/3_maxiumum_diversification/README.txt b/examples/3_maxiumum_diversification/README.txt
deleted file mode 100644
index 8f7bfd9..0000000
--- a/examples/3_maxiumum_diversification/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _maximum_diversification_examples:
-
-Maximum Diversification
------------------------
-
-Examples concerning the :class:`~deepfolio.optimization.MaximumDiversification` optimization.
-
diff --git a/examples/3_maxiumum_diversification/plot_1_maximum_divesification.py b/examples/3_maxiumum_diversification/plot_1_maximum_divesification.py
deleted file mode 100644
index 8bc614a..0000000
--- a/examples/3_maxiumum_diversification/plot_1_maximum_divesification.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""
-=======================
-Maximum Diversification
-=======================
-
-This tutorial uses the :class:`~deepfolio.optimization.MaximumDiversification`
-optimization to find the portfolio that maximizes the diversification ratio, which is
-the ratio of the weighted volatilities over the total volatility.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import EqualWeighted, MaximumDiversification
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-X = prices_to_returns(prices)
-
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create the maximum diversification model and then fit it on the training set:
-model = MaximumDiversification()
-model.fit(X_train)
-model.weights_
-
-# %%
-# To compare this model, we use an equal weighted benchmark using
-# the :class:`~deepfolio.optimization.EqualWeighted` estimator:
-bench = EqualWeighted()
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Diversification Analysis
-# ========================
-# Let's analyze the diversification ratio of both models on the training set.
-# As expected, the maximum diversification model has the highest diversification ratio:
-ptf_model_train = model.predict(X_train)
-ptf_bench_train = bench.predict(X_train)
-print("Diversification Ratio:")
-print(f"    Maximum Diversification model: {ptf_model_train.diversification:0.2f}")
-print(f"    Equal Weighted model: {ptf_bench_train.diversification:0.2f}")
-
-# %%
-# Prediction
-# ==========
-# We predict the model and the benchmark on the test set:
-ptf_model_test = model.predict(X_test)
-ptf_bench_test = bench.predict(X_test)
-
-# %%
-# Analysis
-# ========
-# For improved analysis, it's possible to load both predicted portfolios into a
-# :class:`~deepfolio.population.Population`:
-population = Population([ptf_model_test, ptf_bench_test])
-
-# %%
-# Let's plot each portfolio composition:
-fig = population.plot_composition()
-show(fig)
-
-# %%
-# |
-#
-# Finally we can show a full summary of both strategies evaluated on the test set:
-population.plot_cumulative_returns()
-
-# %%
-# |
-#
-# Finally, we print a full summary of both strategies evaluated on the test set:
-population.summary()
diff --git a/examples/4_distributionally_robust_cvar/README.txt b/examples/4_distributionally_robust_cvar/README.txt
deleted file mode 100644
index f63ce5e..0000000
--- a/examples/4_distributionally_robust_cvar/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _distributionally_robust_examples:
-
-Distributionally Robust CVaR
-----------------------------
-
-Examples concerning the :class:`~deepfolio.optimization.DistributionallyRobustCVaR` optimization.
-
diff --git a/examples/4_distributionally_robust_cvar/plot_1_distributionally_robust_cvar.py b/examples/4_distributionally_robust_cvar/plot_1_distributionally_robust_cvar.py
deleted file mode 100644
index 0b0611c..0000000
--- a/examples/4_distributionally_robust_cvar/plot_1_distributionally_robust_cvar.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""
-============================
-Distributionally Robust CVaR
-============================
-
-This tutorial introduces the :class:`~deepfolio.optimization.DistributionallyRobustCVaR`
-model.
-
-The Distributionally Robust CVaR model constructs a Wasserstein ball in the space of
-multivariate and non-discrete probability distributions centered at the uniform
-distribution on the training samples, and find the allocation that minimize the CVaR of
-the worst-case distribution within this Wasserstein ball.
-
-Mohajerin Esfahani and Kuhn (2018) proved that for piecewise linear objective functions,
-which is the case of CVaR (Rockafellar and Uryasev), the distributionally robust
-optimization problem over Wasserstein ball can be reformulated as finite convex
-programs.
-
-It's advised to use a solver that handles a high number of constraints like `Mosek`.
-For accessibility, this example uses the default open source solver `CLARABEL`, so to
-increase convergence speed, we only use 3 years of data.
-
-The radius of the Wasserstein ball is controlled with the `wasserstein_ball_radius`
-parameter. Increasing the radius will increase the uncertainty about the
-distribution, bringing the weights closer to the equal weighted portfolio.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 2020-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import DistributionallyRobustCVaR, EqualWeighted
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices["2020":]
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.5, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create four distributionally robust CVaR models with different radius then fit them
-# on the training set:
-model1 = DistributionallyRobustCVaR(
-    wasserstein_ball_radius=0.1,
-    portfolio_params=dict(name="Distributionally Robust CVaR - 0.1"),
-)
-model1.fit(X_train)
-
-model2 = DistributionallyRobustCVaR(
-    wasserstein_ball_radius=0.01,
-    portfolio_params=dict(name="Distributionally Robust CVaR - 0.01"),
-)
-model2.fit(X_train)
-
-model3 = DistributionallyRobustCVaR(
-    wasserstein_ball_radius=0.001,
-    portfolio_params=dict(name="Distributionally Robust CVaR - 0.001"),
-)
-model3.fit(X_train)
-
-model4 = DistributionallyRobustCVaR(
-    wasserstein_ball_radius=0.0001,
-    portfolio_params=dict(name="Distributionally Robust CVaR - 0.0001"),
-)
-model4.fit(X_train)
-model4.weights_
-
-# %%
-# To compare the models, we use an equal weighted benchmark using
-# the :class:`~deepfolio.optimization.EqualWeighted` estimator:
-bench = EqualWeighted()
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the models and the benchmark on the test set:
-ptf_model1_test = model1.predict(X_test)
-ptf_model2_test = model2.predict(X_test)
-ptf_model3_test = model3.predict(X_test)
-ptf_model4_test = model4.predict(X_test)
-ptf_bench_test = bench.predict(X_test)
-
-# %%
-# Analysis
-# ========
-# We load all predicted portfolios into a :class:`~deepfolio.population.Population` and
-# plot their compositions:
-population = Population(
-    [ptf_model1_test, ptf_model2_test, ptf_model3_test, ptf_model4_test, ptf_bench_test]
-)
-population.plot_composition()
-
-# %%
-# We can see that by increasing the radius of the Wasserstein ball, the weights get
-# closer to the equal weighted portfolio.
-#
-# Let's plot the portfolios cumulative returns:
-fig = population.plot_cumulative_returns()
-show(fig)
diff --git a/examples/5_clustering/README.txt b/examples/5_clustering/README.txt
deleted file mode 100644
index 7470dba..0000000
--- a/examples/5_clustering/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-.. _cluster_examples:
-
-Hierarchical Clustering and NCO
--------------------------------
-
-Examples concerning hierarchical clustering based optimizations.
diff --git a/examples/5_clustering/plot_1_hrp_cvar.py b/examples/5_clustering/plot_1_hrp_cvar.py
deleted file mode 100644
index b1ba8d9..0000000
--- a/examples/5_clustering/plot_1_hrp_cvar.py
+++ /dev/null
@@ -1,197 +0,0 @@
-"""
-===============================
-Hierarchical Risk Parity - CVaR
-===============================
-
-This tutorial introduces the :class:`~deepfolio.optimization.HierarchicalRiskParity`
-optimization.
-
-Hierarchical Risk Parity (HRP) is a portfolio optimization method developed by Marcos
-Lopez de Prado.
-
-This algorithm uses a distance matrix to compute hierarchical clusters using the
-Hierarchical Tree Clustering algorithm. It then employs seriation to rearrange the
-assets in the dendrogram, minimizing the distance between leafs.
-
-The final step is the recursive bisection where each cluster is split between two
-sub-clusters by starting with the topmost cluster and traversing in a top-down
-manner. For each sub-cluster, we compute the total cluster risk of an inverse-risk
-allocation. A weighting factor is then computed from these two sub-cluster risks,
-which is used to update the cluster weight.
-
-.. note ::
-    The original paper uses the variance as the risk measure and the single-linkage
-    method for the Hierarchical Tree Clustering algorithm. Here we generalize it to
-    multiple risk measures and linkage methods.
-    The default linkage method is set to the Ward
-    variance minimization algorithm, which is more stable and has better properties
-    than the single-linkage method.
-
-In this example, we will use the CVaR risk measure.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the SPX Index composition and the Factors dataset composed of the daily
-# prices of 5 ETF representing common factors:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.cluster import HierarchicalClustering, LinkageMethod
-from deepfolio.datasets import load_factors_dataset, load_sp500_dataset
-from deepfolio.distance import KendallDistance
-from deepfolio.optimization import EqualWeighted, HierarchicalRiskParity
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import FactorModel
-
-prices = load_sp500_dataset()
-factor_prices = load_factors_dataset()
-
-prices = prices["2014":]
-factor_prices = factor_prices["2014":]
-
-X, y = prices_to_returns(prices, factor_prices)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create the CVaR Hierarchical Risk Parity model and then fit it on the training set:
-model1 = HierarchicalRiskParity(
-    risk_measure=RiskMeasure.CVAR, portfolio_params=dict(name="HRP-CVaR-Ward-Pearson")
-)
-model1.fit(X_train)
-model1.weights_
-
-# %%
-# Risk Contribution
-# =================
-# Let's analyze the risk contribution of the model on the training set:
-ptf1 = model1.predict(X_train)
-ptf1.plot_contribution(measure=RiskMeasure.CVAR)
-
-# %%
-# Dendrogram
-# ==========
-# To analyze the clusters structure, we plot the dendrogram.
-# The blue lines represent distinct clusters composed of a single asset.
-# The remaining colors represent clusters of more than one asset:
-model1.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=False)
-
-# %%
-# The horizontal axis represents the assets. The links between clusters are represented
-# as upside-down U-shaped lines. The height of the U indicates the distance between the
-# clusters. For example, the link representing the cluster containing assets HD and WMT
-# has a distance of 0.5 (called cophenetic distance).
-
-# %%
-#  When `heatmap` is set to True, the heatmap of the reordered distance matrix is
-#  displayed below the dendrogram and clusters are outlined with yellow squares:
-fig = model1.hierarchical_clustering_estimator_.plot_dendrogram()
-show(fig)
-
-# %%
-# Linkage Methods
-# ===============
-# The clustering can be greatly affected by the choice of the linkage method.
-# The original HRP is based on the single-linkage (equivalent to the minimum spanning
-# tree), which suffers from the chaining effect.
-# In the :class:`~deepfolio.optimization.HierarchicalRiskParity` estimator, the default
-# linkage method is set to the Ward variance minimization algorithm, which is more
-# stable and has better properties than the single-linkage method.
-#
-# However, since the HRP optimization doesn’t utilize the full cluster structure but
-# only their orders, the allocation remains relatively stable regardless of the chosen
-# linkage method.
-
-# To show this effect, let's create a second model with the single-linkage method:
-model2 = HierarchicalRiskParity(
-    risk_measure=RiskMeasure.CVAR,
-    hierarchical_clustering_estimator=HierarchicalClustering(
-        linkage_method=LinkageMethod.SINGLE,
-    ),
-    portfolio_params=dict(name="HRP-CVaR-Single-Pearson"),
-)
-model2.fit(X_train)
-
-model2.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# We can see that the clustering has been greatly affected by the change of the linkage
-# method. However, you will see bellow that the weights remain relatively stable for the
-# reason explained earlier.
-
-# %%
-# Distance Estimator
-# ==================
-# The choice of distance metric has also an important effect on the clustering.
-# The default is to use the distance from the pearson correlation matrix.
-# This can be changed using the :ref:`distance estimators <distance>`.
-#
-# For example, let's create a third model with a distance computed from the absolute
-# value of the Kendal correlation matrix:
-model3 = HierarchicalRiskParity(
-    risk_measure=RiskMeasure.CVAR,
-    distance_estimator=KendallDistance(absolute=True),
-    portfolio_params=dict(name="HRP-CVaR-Ward-Kendal"),
-)
-model3.fit(X_train)
-
-model3.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# Prior Estimator
-# ===============
-# Finally, HRP like the other portfolio optimization, uses a
-# :ref:`prior estimator <prior>` that fits a :class:`~deepfolio.prior.PriorModel`
-# containing the distribution estimate of asset returns. It represents the investor's
-# prior beliefs about the model used to estimate such distribution.
-# The default is the :class:`~deepfolio.prior.EmpiricalPrior` estimator.
-#
-# Let's create new model with the :class:`~deepfolio.prior.FactorModel` estimator:
-model4 = HierarchicalRiskParity(
-    risk_measure=RiskMeasure.CVAR,
-    prior_estimator=FactorModel(),
-    portfolio_params=dict(name="HRP-CVaR-Factor-Model"),
-)
-model4.fit(X_train, y_train)
-
-model4.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)
-
-
-# %%
-# To compare the models, we use an equal weighted benchmark using
-# the :class:`~deepfolio.optimization.EqualWeighted` estimator:
-bench = EqualWeighted()
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the models and the benchmark on the test set:
-population_test = Population([])
-for model in [model1, model2, model3, model4, bench]:
-    population_test.append(model.predict(X_test))
-
-population_test.plot_cumulative_returns()
-
-# %%
-# Composition
-# ===========
-# From the below composition, we notice that all models are relatively close to each
-# others as explain earlier:
-population_test.plot_composition()
-
-# %%
-# Summary
-# =======
-# Finally, let's print the summary statistics:
-summary = population_test.summary()
-summary.loc["Annualized Sharpe Ratio"]
-
-# %% Full summary:
-summary
diff --git a/examples/5_clustering/plot_2_herc_cdar.py b/examples/5_clustering/plot_2_herc_cdar.py
deleted file mode 100644
index 8aa4987..0000000
--- a/examples/5_clustering/plot_2_herc_cdar.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""
-===========================================
-Hierarchical Equal Risk Contribution - CDaR
-===========================================
-
-This tutorial introduces the
-:class:`~deepfolio.optimization.HierarchicalEqualRiskContribution` optimization.
-
-The Hierarchical Equal Risk Contribution (HERC) is a portfolio optimization method
-developed by Thomas Raffinot.
-
-This algorithm uses a distance matrix to compute hierarchical clusters using the
-Hierarchical Tree Clustering algorithm. It then computes, for each cluster, the
-total cluster risk of an inverse-risk allocation.
-
-The final step is the top-down recursive division of the dendrogram, where the
-assets weights are updated using a naive risk parity within clusters.
-
-It differs from the Hierarchical Risk Parity by exploiting the dendrogram shape
-during the top-down recursive division instead of bisecting it.
-
-.. note ::
-    The default linkage method is set to the Ward variance minimization algorithm,
-    which is more stable and has better properties than the single-linkage method
-
-In this example, we will use the CDaR risk measure.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 2020-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.cluster import HierarchicalClustering, LinkageMethod
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.distance import KendallDistance
-from deepfolio.optimization import (
-    EqualWeighted,
-    HierarchicalEqualRiskContribution,
-)
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.5, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create a CVaR Hierarchical Equal Risk Contribution model and then fit it on the
-# training set:
-model1 = HierarchicalEqualRiskContribution(
-    risk_measure=RiskMeasure.CDAR, portfolio_params=dict(name="HERC-CDaR-Ward-Pearson")
-)
-model1.fit(X_train)
-model1.weights_
-
-# %%
-# Risk Contribution
-# =================
-# Let's analyze the risk contribution of the model on the training set:
-ptf1 = model1.predict(X_train)
-ptf1.plot_contribution(measure=RiskMeasure.CDAR)
-
-# %%
-# Dendrogram
-# ==========
-# To analyze the clusters structure, we plot the dendrogram.
-# The blue lines represent distinct clusters composed of a single asset.
-# The remaining colors represent clusters of more than one asset:
-fig = model1.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=False)
-show(fig)
-
-# %%
-# |
-#
-# The horizontal axis represents the assets. The links between clusters are represented
-# as upside-down U-shaped lines. The height of the U indicates the distance between the
-# clusters. For example, the link representing the cluster containing Assets HD and WMT
-# has a distance of 0.5 (called cophenetic distance).
-
-
-# %%
-# When `heatmap` is set to True, the heatmap of the reordered distance matrix is
-# displayed below the dendrogram and clusters are outlined with yellow squares:
-model1.hierarchical_clustering_estimator_.plot_dendrogram()
-
-# %%
-# Linkage Methods
-# ===============
-# The clustering can be greatly affected by the choice of the linkage method.
-# In the :class:`~deepfolio.optimization.HierarchicalEqualRiskContribution` estimator, the
-# default linkage method is set to the Ward variance minimization algorithm which is
-# more stable and has better properties than the single-linkage method, which suffers
-# from the chaining effect.
-#
-# And because HERC rely on the dendrogram structure as opposed
-# to HRP, the choice of the linkage method will have a greater impact on the allocation.
-#
-# To show this effect, let's create a second model with the single-linkage method:
-model2 = HierarchicalEqualRiskContribution(
-    risk_measure=RiskMeasure.CDAR,
-    hierarchical_clustering_estimator=HierarchicalClustering(
-        linkage_method=LinkageMethod.SINGLE,
-    ),
-    portfolio_params=dict(name="HERC-CDaR-Single-Pearson"),
-)
-model2.fit(X_train)
-model2.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# We can see that the clustering has been greatly affected by the change of the linkage
-# method. Let's analyze the risk contribution of this model on the training set:
-ptf2 = model2.predict(X_train)
-ptf2.plot_contribution(measure=RiskMeasure.CDAR)
-
-# %%
-# The risk of that second model is very concentrated. We can already conclude that the
-# single-linkage method is not appropriate for this dataset. This will be confirmed
-# below on the test set.
-
-# %%
-# Distance Estimator
-# ==================
-# The distance metric used has also an important effect on the clustering.
-# The default is to use the distance of the pearson correlation matrix.
-# This can be changed using the :ref:`distance estimators <distance>`.
-# For example, let's create a third model with a distance computed from the absolute
-# value of the Kendal correlation matrix:
-model3 = HierarchicalEqualRiskContribution(
-    risk_measure=RiskMeasure.CDAR,
-    distance_estimator=KendallDistance(absolute=True),
-    portfolio_params=dict(name="HERC-CDaR-Ward-Kendal"),
-)
-model3.fit(X_train)
-model3.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# To compare the models, we use an equal weighted benchmark using
-# the :class:`~deepfolio.optimization.EqualWeighted` estimator:
-bench = EqualWeighted()
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the models and the benchmark on the test set:
-population_test = Population([])
-for model in [model1, model2, model3, bench]:
-    population_test.append(model.predict(X_test))
-
-population_test.plot_cumulative_returns()
-
-# %%
-# Composition
-# ===========
-# From the below composition, we notice that the model with single-linkage method is
-# highly concentrated:
-population_test.plot_composition()
diff --git a/examples/5_clustering/plot_3_hrp_vs_herc.py b/examples/5_clustering/plot_3_hrp_vs_herc.py
deleted file mode 100644
index cc3196a..0000000
--- a/examples/5_clustering/plot_3_hrp_vs_herc.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-===========
-HRP vs HERC
-===========
-
-In this tutorial, we will compare the
-:class:`~deepfolio.optimization.HierarchicalRiskParity` (HRP) optimization with the
-:class:`~deepfolio.optimization.HierarchicalEqualRiskContribution` (HERC) optimization.
-
-For that comparison, we consider a 3 months rolling (60 business days) allocation fitted
-on the preceding year of data (252 business days) that minimizes the CVaR.
-
-We will employ `GridSearchCV` to select the optimal parameters of each model on the
-training set using cross-validation that achieves the highest average out-of-sample
-Mean-CVaR ratio.
-
-Then, we will evaluate the models on the test set and compare them with the
-equal-weighted benchmark.
-
-Finally, we will use the :class:`~deepfolio.model_selection.CombinatorialPurgedCV` to
-analyze the stability and distribution of both models.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 :ref:`dataset <datasets>` composed of the daily prices of 64
-# assets from the FTSE 100 Index composition starting from 2000-01-04 up to 2023-05-31:
-from plotly.io import show
-from PyTorch.model_selection import GridSearchCV, train_test_split
-
-from deepfolio import Population, RatioMeasure, RiskMeasure
-from deepfolio.cluster import HierarchicalClustering, LinkageMethod
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.distance import KendallDistance, PearsonDistance
-from deepfolio.metrics import make_scorer
-from deepfolio.model_selection import (
-    CombinatorialPurgedCV,
-    WalkForward,
-    cross_val_predict,
-    optimal_folds_number,
-)
-from deepfolio.optimization import (
-    HierarchicalEqualRiskContribution,
-    HierarchicalRiskParity,
-)
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_ftse100_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create two models: an HRP-CVaR and an HERC-CVaR:
-model_hrp = HierarchicalRiskParity(
-    risk_measure=RiskMeasure.CVAR,
-    hierarchical_clustering_estimator=HierarchicalClustering(),
-)
-
-model_herc = HierarchicalEqualRiskContribution(
-    risk_measure=RiskMeasure.CVAR,
-    hierarchical_clustering_estimator=HierarchicalClustering(),
-)
-
-# %%
-# Parameter Tuning
-# ================
-# For both HRP and HERC models, we find the parameters that maximizes the average
-# out-of-sample Mean-CVaR ratio using `GridSearchCV` with `WalkForward` cross-validation
-# on the training set. The `WalkForward` are chosen to simulate a three months
-# (60 business days) rolling portfolio fitted on the previous year (252 business days):
-cv = WalkForward(train_size=252, test_size=60)
-
-grid_search_hrp = GridSearchCV(
-    estimator=model_hrp,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={
-        "distance_estimator": [PearsonDistance(), KendallDistance()],
-        "hierarchical_clustering_estimator__linkage_method": [
-            # LinkageMethod.SINGLE,
-            LinkageMethod.WARD,
-            LinkageMethod.COMPLETE,
-        ],
-    },
-    scoring=make_scorer(RatioMeasure.CVAR_RATIO),
-)
-grid_search_hrp.fit(X_train)
-model_hrp = grid_search_hrp.best_estimator_
-print(model_hrp)
-
-# %%
-#
-grid_search_herc = grid_search_hrp.set_params(estimator=model_herc)
-grid_search_herc.fit(X_train)
-model_herc = grid_search_herc.best_estimator_
-print(model_herc)
-
-# %%
-# Prediction
-# ==========
-# We evaluate the two models using the same `WalkForward` object on the test set:
-pred_hrp = cross_val_predict(
-    model_hrp,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(name="HRP"),
-)
-
-pred_herc = cross_val_predict(
-    model_herc,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(name="HERC"),
-)
-# %%
-# Each predicted object is a `MultiPeriodPortfolio`.
-# For improved analysis, we can add them to a `Population`:
-population = Population([pred_hrp, pred_herc])
-
-# %%
-# Let's plot the rolling portfolios compositions:
-population.plot_composition(display_sub_ptf_name=False)
-
-# %%
-# Let's plot the rolling portfolios cumulative returns on the test set:
-population.plot_cumulative_returns()
-
-# %%
-# Analysis
-# ========
-# HERC outperform HRP both in terms of CVaR minimization and Mean-CVaR ratio
-# maximization:
-for ptf in population:
-    print("=" * 25)
-    print(" " * 8 + ptf.name)
-    print("=" * 25)
-    print(f"CVaR : {ptf.cvar:0.2%}")
-    print(f"Mean-CVaR ratio : {ptf.cvar_ratio:0.4f}")
-    print("\n")
-
-# %%
-# Combinatorial Purged Cross-Validation
-# =====================================
-# Only using one testing path (the historical path) may not be enough to compare
-# models. For a more robust analysis, we can use the
-# :class:`~deepfolio.model_selection.CombinatorialPurgedCV` to create multiple testing
-# paths from different training folds combinations.
-#
-# We choose `n_folds` and `n_test_folds` to obtain around 100 test paths and an average
-# training size of 252 days:
-n_folds, n_test_folds = optimal_folds_number(
-    n_observations=X_test.shape[0],
-    target_n_test_paths=100,
-    target_train_size=252,
-)
-
-cv = CombinatorialPurgedCV(n_folds=n_folds, n_test_folds=n_test_folds)
-cv.summary(X_test)
-
-# %%
-pred_hrp = cross_val_predict(
-    model_hrp,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(tag="HRP"),
-)
-pred_herc = cross_val_predict(
-    model_herc,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(tag="HERC"),
-)
-
-# %%
-# The predicted object is a `Population` of `MultiPeriodPortfolio`. Each
-# `MultiPeriodPortfolio` represents one testing path of a rolling portfolio.
-# For improved analysis, we can merge the populations of each model:
-population = pred_hrp + pred_herc
-
-# %%
-# Distribution
-# ============
-# We plot the out-of-sample distribution of Mean-CVaR Ratio for each model:
-population.plot_distribution(
-    measure_list=[RatioMeasure.CVAR_RATIO], tag_list=["HRP", "HERC"], n_bins=50
-)
-
-# %%
-for pred in [pred_hrp, pred_herc]:
-    print("=" * 25)
-    print(" " * 8 + pred[0].tag)
-    print("=" * 25)
-    print(
-        "Average Mean-CVaR ratio :"
-        f" {pred.measures_mean(measure=RatioMeasure.CVAR_RATIO):0.4f}"
-    )
-    print(
-        "Std Mean-CVaR ratio :"
-        f" {pred.measures_std(measure=RatioMeasure.CVAR_RATIO):0.4f}"
-    )
-    print("\n")
-
-# %%
-# We can see that, in terms of Mean-CVaR Ratio distribution, the HERC model has a higher
-# mean than the HRP model but also a higher standard deviation. In other words, HERC is
-# less stable than HRP but performs slightly better in average.
-
-# %%
-# We can do the same analysis for other measures:
-fig = population.plot_distribution(
-    measure_list=[
-        RatioMeasure.ANNUALIZED_SHARPE_RATIO,
-        RatioMeasure.ANNUALIZED_SORTINO_RATIO,
-    ],
-    tag_list=["HRP", "HERC"],
-    n_bins=50,
-)
-show(fig)
diff --git a/examples/5_clustering/plot_4_nco.py b/examples/5_clustering/plot_4_nco.py
deleted file mode 100644
index 05f9b1f..0000000
--- a/examples/5_clustering/plot_4_nco.py
+++ /dev/null
@@ -1,181 +0,0 @@
-"""
-============================
-Nested Clusters Optimization
-============================
-
-This tutorial introduces the :class:`~deepfolio.optimization.NestedClustersOptimization`
-optimization.
-
-Nested Clusters Optimization (NCO) is a portfolio optimization method developed by
-Marcos Lopez de Prado.
-
-It uses a distance matrix to compute clusters using a clustering algorithm (
-Hierarchical Tree Clustering, KMeans, etc..). For each cluster, the inner-cluster
-weights are computed by fitting the inner-estimator on each cluster using the whole
-training data. Then the outer-cluster weights are computed by training the
-outer-estimator using out-of-sample estimates of the inner-estimators with
-cross-validation. Finally, the final assets weights are the dot-product of the
-inner-weights and outer-weights.
-
-.. note ::
-
-    The original paper uses KMeans as the clustering algorithm, minimum Variance for
-    the inner-estimator and equal-weight for the outer-estimator. Here we generalize
-    it to all `PyTorch` and `deepfolio` clustering algorithm (Hierarchical Tree
-    Clustering, KMeans, etc.), all portfolio optimizations (Mean-Variance, HRP, etc.)
-    and risk measures (variance, CVaR, etc.).
-    To avoid data leakage at the outer-estimator, we use out-of-sample estimates to
-    fit the outer estimator.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.cluster import KMeans
-from PyTorch.model_selection import train_test_split
-
-from deepfolio import Population, RiskMeasure
-from deepfolio.cluster import HierarchicalClustering, LinkageMethod
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.distance import KendallDistance
-from deepfolio.optimization import (
-    EqualWeighted,
-    MeanRisk,
-    NestedClustersOptimization,
-    ObjectiveFunction,
-    RiskBudgeting,
-)
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create a NCO model that maximizes the Sharpe Ratio intra-cluster and uses a CVaR
-# Risk Parity inter-cluster. By default, the inter-cluster optimization
-# uses `KFolds` out-of-sample estimates of the inner-estimator to avoid data leakage.
-# and the :class:`~deepfolio.cluster.HierarchicalClustering` estimator
-# to form the clusters:
-inner_estimator = MeanRisk(
-    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
-    risk_measure=RiskMeasure.VARIANCE,
-)
-outer_estimator = RiskBudgeting(risk_measure=RiskMeasure.CVAR)
-
-model1 = NestedClustersOptimization(
-    inner_estimator=inner_estimator,
-    outer_estimator=outer_estimator,
-    n_jobs=-1,
-    portfolio_params=dict(name="NCO-1"),
-)
-model1.fit(X_train)
-model1.weights_
-
-# %%
-# Dendrogram
-# ==========
-# To analyze the clusters structure, we can plot the dendrogram.
-# The blue lines represent distinct clusters composed of a single asset.
-# The remaining colors represent clusters of more than one asset:
-model1.clustering_estimator_.plot_dendrogram(heatmap=False)
-
-# %%
-# The horizontal axis represent the assets. The links between clusters are represented
-# as upside-down U-shaped lines. The height of the U indicates the distance between the
-# clusters. For example, the link representing the cluster containing Assets HD and WMT
-# has a distance of 0.5 (called cophenetic distance).
-
-# %%
-#  When `heatmap` is set to True, the heatmap of the reordered distance matrix is
-#  displayed below the dendrogram and clusters are outlined with yellow squares:
-model1.clustering_estimator_.plot_dendrogram()
-
-# %%
-# Linkage Methods
-# ===============
-# The hierarchical clustering can be greatly affected by the choice of the linkage
-# method. In the :class:`~deepfolio.cluster.HierarchicalClustering` estimator, the default
-# linkage method is set to the Ward variance minimization algorithm, which is more
-# stable and has better properties than the single-linkage method which suffers from the
-# chaining effect.
-#
-# To show this effect, let's create a second model with the
-# single-linkage method:
-model2 = NestedClustersOptimization(
-    inner_estimator=inner_estimator,
-    outer_estimator=outer_estimator,
-    clustering_estimator=HierarchicalClustering(
-        linkage_method=LinkageMethod.SINGLE,
-    ),
-    n_jobs=-1,
-    portfolio_params=dict(name="NCO-2"),
-)
-model2.fit(X_train)
-model2.clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# Distance Estimator
-# ==================
-# The distance metric used has also an important effect on the clustering.
-# The default is to use the distance of the pearson correlation matrix.
-# This can be changed using the :ref:`distance estimators <distance>`.
-#
-# For example, let's create a third model with a distance computed from the absolute
-# value of the Kendal correlation matrix:
-model3 = NestedClustersOptimization(
-    inner_estimator=inner_estimator,
-    outer_estimator=outer_estimator,
-    distance_estimator=KendallDistance(absolute=True),
-    n_jobs=-1,
-    portfolio_params=dict(name="NCO-3"),
-)
-model3.fit(X_train)
-model3.clustering_estimator_.plot_dendrogram(heatmap=True)
-
-# %%
-# Clustering Estimator
-# ====================
-# The above models used the default :class:`~deepfolio.cluster.HierarchicalClustering`
-# estimator. This can be replaced by any `PyTorch` or `deepfolio` clustering estimators.
-#
-# For example, let's create a new model with `PyTorch.cluster.KMeans`:
-model4 = NestedClustersOptimization(
-    inner_estimator=inner_estimator,
-    outer_estimator=outer_estimator,
-    clustering_estimator=KMeans(n_init="auto"),
-    n_jobs=-1,
-    portfolio_params=dict(name="NCO-4"),
-)
-model4.fit(X_train)
-model4.weights_
-
-# %%
-# To compare the NCO models, we use an equal weighted benchmark using
-# the :class:`~deepfolio.optimization.EqualWeighted` estimator:
-bench = EqualWeighted()
-bench.fit(X_train)
-bench.weights_
-
-# %%
-# Prediction
-# ==========
-# We predict the models and the benchmark on the test set:
-population_test = Population([])
-for model in [model1, model2, model3, model4, bench]:
-    population_test.append(model.predict(X_test))
-
-population_test.plot_cumulative_returns()
-
-# %%
-# Composition
-# ===========
-# Let's plot each portfolio composition:
-fig = population_test.plot_composition()
-show(fig)
diff --git a/examples/5_clustering/plot_5_nco_grid_search.py b/examples/5_clustering/plot_5_nco_grid_search.py
deleted file mode 100644
index 2a72ee5..0000000
--- a/examples/5_clustering/plot_5_nco_grid_search.py
+++ /dev/null
@@ -1,194 +0,0 @@
-"""
-=============================
-NCO - Combinatorial Purged CV
-=============================
-
-The previous tutorial introduced the
-:class:`~deepfolio.optimization.NestedClustersOptimization`.
-
-In this tutorial, we will perform hyperparameter search using `GridSearch` and
-distribution analysis with `CombinatorialPurgedCV`.
-"""
-
-# %%
-# Data
-# ====
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 2015-01-02 up to 2022-12-28:
-from plotly.io import show
-from PyTorch.model_selection import GridSearchCV, train_test_split
-
-from deepfolio import Population, RatioMeasure, RiskMeasure
-from deepfolio.cluster import HierarchicalClustering, LinkageMethod
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.distance import KendallDistance, PearsonDistance
-from deepfolio.model_selection import (
-    CombinatorialPurgedCV,
-    WalkForward,
-    cross_val_predict,
-    optimal_folds_number,
-)
-from deepfolio.optimization import (
-    EqualWeighted,
-    MeanRisk,
-    NestedClustersOptimization,
-    RiskBudgeting,
-)
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_sp500_dataset()
-prices = prices["2015":]
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.5, shuffle=False)
-
-# %%
-# Model
-# =====
-# We create two models: the NCO and the equal-weighted benchmark:
-benchmark = EqualWeighted()
-
-model_nco = NestedClustersOptimization(
-    inner_estimator=MeanRisk(), clustering_estimator=HierarchicalClustering()
-)
-
-# %%
-# Parameter Tuning
-# ================
-# We find the model parameters that maximizes the out-of-sample Sharpe ratio using
-# `GridSearchCV` with `WalkForward` cross-validation on the training set.
-# The `WalkForward` are chosen to simulate a three months (60 business days) rolling
-# portfolio fitted on the previous year (252 business days):
-cv = WalkForward(train_size=252, test_size=60)
-
-grid_search_hrp = GridSearchCV(
-    estimator=model_nco,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={
-        "inner_estimator__risk_measure": [RiskMeasure.VARIANCE, RiskMeasure.CVAR],
-        "outer_estimator": [
-            EqualWeighted(),
-            RiskBudgeting(risk_measure=RiskMeasure.CVAR),
-        ],
-        "clustering_estimator__linkage_method": [
-            LinkageMethod.SINGLE,
-            LinkageMethod.WARD,
-        ],
-        "distance_estimator": [PearsonDistance(), KendallDistance()],
-    },
-)
-grid_search_hrp.fit(X_train)
-model_nco = grid_search_hrp.best_estimator_
-print(model_nco)
-
-# %%
-# Prediction
-# ==========
-# We evaluate the two models using the same `WalkForward` object on the test set:
-pred_bench = cross_val_predict(
-    benchmark,
-    X_test,
-    cv=cv,
-    portfolio_params=dict(name="Benchmark"),
-)
-
-pred_nco = cross_val_predict(
-    model_nco,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(name="NCO"),
-)
-# %%
-# Each predicted object is a `MultiPeriodPortfolio`.
-# For improved analysis, we can add them to a `Population`:
-population = Population([pred_bench, pred_nco])
-
-# %%
-# Let's plot the rolling portfolios compositions:
-population.plot_composition(display_sub_ptf_name=False)
-
-# %%
-# Let's plot the rolling portfolios cumulative returns on the test set:
-fig = population.plot_cumulative_returns()
-show(fig)
-
-# %%
-# Analysis
-# ========
-# The NCO outperforms the Benchmark on the test set for the below measures:
-# maximization:
-for ptf in population:
-    print("=" * 25)
-    print(" " * 8 + ptf.name)
-    print("=" * 25)
-    print(f"Ann. Sharpe ratio : {ptf.annualized_sharpe_ratio:0.2f}")
-    print(f"CVaR ratio : {ptf.cvar_ratio:0.4f}")
-    print("\n")
-
-# %%
-# Combinatorial Purged Cross-Validation
-# =====================================
-# Only using one testing path (the historical path) may not be enough for comparing both
-# models. For a more robust analysis, we can use
-# :class:`~deepfolio.model_selection.CombinatorialPurgedCV` to create multiple testing
-# paths from different training folds combinations.
-#
-# We choose `n_folds` and `n_test_folds` to obtain around 30 test paths and an average
-# training size of 252 days:
-n_folds, n_test_folds = optimal_folds_number(
-    n_observations=X_test.shape[0],
-    target_n_test_paths=30,
-    target_train_size=252,
-)
-
-cv = CombinatorialPurgedCV(n_folds=n_folds, n_test_folds=n_test_folds)
-cv.summary(X_test)
-
-# %%
-pred_nco = cross_val_predict(
-    model_nco,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(tag="NCO"),
-)
-
-# %%
-# The predicted object is a `Population` of `MultiPeriodPortfolio`. Each
-# `MultiPeriodPortfolio` represents one testing path of a rolling portfolio.
-
-# %%
-# Distribution
-# ============
-# We plot the out-of-sample distribution of Sharpe Ratio for the NCO model:
-pred_nco.plot_distribution(measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO])
-
-# %%
-# Let's print the average and standard-deviation of out-of-sample Sharpe Ratios:
-print(
-    "Average of Sharpe Ratio :"
-    f" {pred_nco.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-print(
-    "Std of Sharpe Ratio :"
-    f" {pred_nco.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-
-# %%
-# Let's compare it with the benchmark:
-pred_bench = benchmark.fit_predict(X_test)
-print(pred_bench.annualized_sharpe_ratio)
-
-# %%
-# Conclusion
-# ==========
-# This NCO model outperforms the Benchmark in terms of Sharpe Ratio on the historical
-# test set. However, the distribution analysis on the recombined (non-historical) test
-# sets shows that it slightly underperforms the Benchmark in average.
-#
-# This was a toy example to present the API. Further analysis using different
-# estimators, datasets and CV parameters should be performed to determine if the
-# outperformance on the historical test set is due to chance or if this NCO model is
-# able to exploit time-dependencies information lost in `CombinatorialPurgedCV`.
diff --git a/examples/6_ensemble/README.txt b/examples/6_ensemble/README.txt
deleted file mode 100644
index eafdccc..0000000
--- a/examples/6_ensemble/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-.. _ensemble_examples:
-
-Ensemble Optimizations
-----------------------
-
-Examples concerning ensemble optimizations.
-
diff --git a/examples/6_ensemble/plot_1_stacking.py b/examples/6_ensemble/plot_1_stacking.py
deleted file mode 100644
index 5b175e4..0000000
--- a/examples/6_ensemble/plot_1_stacking.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""
-=====================
-Stacking Optimization
-=====================
-
-This tutorial introduces the :class:`~deepfolio.optimization.StackingOptimization`.
-
-Stacking Optimization is an ensemble method that consists in stacking the output of
-individual portfolio optimizations with a final portfolio optimization.
-
-The weights are the dot-product of individual optimizations weights with the final
-optimization weights.
-
-Stacking allows to use the strength of each individual portfolio optimization by using
-their output as input of a final portfolio optimization.
-
-To avoid data leakage, out-of-sample estimates are used to fit the outer optimization.
-
-.. note ::
-    The `estimators_` are fitted on the full `X` while `final_estimator_` is trained
-    using cross-validated predictions of the base estimators using `cross_val_predict`.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 dataset. This dataset is composed of the daily prices of 64
-# assets from the FTSE 100 Index composition starting from 2000-01-04 up to 2023-05-31:
-from plotly.io import show
-from PyTorch.model_selection import GridSearchCV, train_test_split
-
-from deepfolio import Population, RatioMeasure, RiskMeasure
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.metrics import make_scorer
-from deepfolio.model_selection import (
-    CombinatorialPurgedCV,
-    WalkForward,
-    cross_val_predict,
-    optimal_folds_number,
-)
-from deepfolio.moments import EmpiricalCovariance, LedoitWolf
-from deepfolio.optimization import (
-    EqualWeighted,
-    HierarchicalEqualRiskContribution,
-    InverseVolatility,
-    MaximumDiversification,
-    MeanRisk,
-    ObjectiveFunction,
-    StackingOptimization,
-)
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior
-
-prices = load_ftse100_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.50, shuffle=False)
-
-# %%
-# Stacking Model
-# ==============
-# Our stacking model will be composed of 4 models:
-#   * Inverse Volatility
-#   * Maximum Diversification
-#   * Maximum Mean-Risk Utility allowing short position with L1 regularization
-#   * Hierarchical Equal Risk Contribution
-#
-# We will stack these 4 models together using the Mean-CDaR utility maximization:
-
-estimators = [
-    ("model1", InverseVolatility()),
-    ("model2", MaximumDiversification(prior_estimator=EmpiricalPrior())),
-    (
-        "model3",
-        MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_UTILITY, min_weights=-1),
-    ),
-    ("model4", HierarchicalEqualRiskContribution()),
-]
-
-model_stacking = StackingOptimization(
-    estimators=estimators,
-    final_estimator=MeanRisk(
-        objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
-        risk_measure=RiskMeasure.CDAR,
-    ),
-)
-
-# %%
-# Benchmark
-# =========
-# To compare the staking model, we use an equal-weighted benchmark:
-benchmark = EqualWeighted()
-
-# %%
-# Parameter Tuning
-# ================
-# To demonstrate how parameter tuning works in a staking model, we find the model
-# parameters that maximizes the out-of-sample Calmar Ratio using `GridSearchCV` with
-# `WalkForward` cross-validation on the training set.
-# The `WalkForward` are chosen to simulate a three months (60 business days) rolling
-# portfolio fitted on the previous year (252 business days):
-cv = WalkForward(train_size=252, test_size=60)
-
-grid_search = GridSearchCV(
-    estimator=model_stacking,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={
-        "model2__prior_estimator__covariance_estimator": [
-            EmpiricalCovariance(),
-            LedoitWolf(),
-        ],
-        "model3__l1_coef": [0.001, 0.1],
-        "model4__risk_measure": [
-            RiskMeasure.VARIANCE,
-            RiskMeasure.GINI_MEAN_DIFFERENCE,
-        ],
-    },
-    scoring=make_scorer(RatioMeasure.CALMAR_RATIO),
-)
-grid_search.fit(X_train)
-model_stacking = grid_search.best_estimator_
-print(model_stacking)
-
-# %%
-# Prediction
-# ==========
-# We evaluate the Stacking model and the Benchmark using the same `WalkForward` object
-# on the test set:
-pred_bench = cross_val_predict(
-    benchmark,
-    X_test,
-    cv=cv,
-    portfolio_params=dict(name="Benchmark"),
-)
-
-pred_stacking = cross_val_predict(
-    model_stacking,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(name="Stacking"),
-)
-
-# %%
-# Each predicted object is a `MultiPeriodPortfolio`.
-# For improved analysis, we can add them to a `Population`:
-population = Population([pred_bench, pred_stacking])
-
-# %%
-# Let's plot the rolling portfolios cumulative returns on the test set:
-population.plot_cumulative_returns()
-
-# %%
-# Let's plot the rolling portfolios compositions:
-population.plot_composition(display_sub_ptf_name=False)
-
-# %%
-# Analysis
-# ========
-# The Stacking model outperforms the Benchmark on the test set for the below ratios:
-for ptf in population:
-    print("=" * 25)
-    print(" " * 8 + ptf.name)
-    print("=" * 25)
-    print(f"Sharpe ratio : {ptf.annualized_sharpe_ratio:0.2f}")
-    print(f"CVaR ratio : {ptf.cdar_ratio:0.5f}")
-    print(f"Calmar ratio : {ptf.calmar_ratio:0.5f}")
-    print("\n")
-
-# %%
-# Let's display the full summary:
-population.summary()
-
-# %%
-# Combinatorial Purged Cross-Validation
-# =====================================
-# Only using one testing path (the historical path) may not be enough for comparing both
-# models. For a more robust analysis, we can use the
-# :class:`~deepfolio.model_selection.CombinatorialPurgedCV` to create multiple testing
-# paths from different training folds combinations.
-#
-# We choose `n_folds` and `n_test_folds` to obtain around 170 test paths and an average
-# training size of 252 days:
-n_folds, n_test_folds = optimal_folds_number(
-    n_observations=X_test.shape[0],
-    target_n_test_paths=170,
-    target_train_size=252,
-)
-
-cv = CombinatorialPurgedCV(n_folds=n_folds, n_test_folds=n_test_folds)
-cv.summary(X_test)
-
-# %%
-pred_stacking = cross_val_predict(
-    model_stacking,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(tag="Stacking"),
-)
-
-# %%
-# The predicted object is a `Population` of `MultiPeriodPortfolio`. Each
-# `MultiPeriodPortfolio` represents one test path of a rolling portfolio.
-
-# %%
-# Distribution
-# ============
-# Let's plot the out-of-sample distribution of Sharpe Ratio for the Stacking model:
-pred_stacking.plot_distribution(
-    measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO], n_bins=40
-)
-
-# %%
-print(
-    "Average of Sharpe Ratio :"
-    f" {pred_stacking.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-print(
-    "Std of Sharpe Ratio :"
-    f" {pred_stacking.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-
-# %%
-# Now, let's analyze how the sub-models would have performed independently and compare
-# their distribution with the Stacking model:
-population = Population([])
-for model_name, model in model_stacking.estimators:
-    pred = cross_val_predict(
-        model,
-        X_test,
-        cv=cv,
-        n_jobs=-1,
-        portfolio_params=dict(tag=model_name),
-    )
-    population.extend(pred)
-population.extend(pred_stacking)
-
-fig = population.plot_distribution(
-    measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO],
-    n_bins=40,
-    tag_list=["Stacking", "model1", "model2", "model3", "model4"],
-)
-show(fig)
-
-# %%
-# Conclusion
-# ==========
-# The Stacking model outperforms the Benchmark on the historical test set. The
-# distribution analysis on the recombined (non-historical) test sets shows that the
-# Stacking model continues to outperform the Benchmark in average.
diff --git a/examples/7_pre_selection/README.txt b/examples/7_pre_selection/README.txt
deleted file mode 100644
index 21f99f4..0000000
--- a/examples/7_pre_selection/README.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-.. _pre_selection_examples:
-
-Pre-selection
--------------
-
-Examples of using :ref:`pre-selection transformers <pre_selection>` with `Pipelines`.
-
-
diff --git a/examples/7_pre_selection/plot_1_drop_correlated.py b/examples/7_pre_selection/plot_1_drop_correlated.py
deleted file mode 100644
index 28fe0c8..0000000
--- a/examples/7_pre_selection/plot_1_drop_correlated.py
+++ /dev/null
@@ -1,159 +0,0 @@
-"""
-=============================
-Drop Highly Correlated Assets
-=============================
-
-This tutorial introduces the  :ref:`pre-selection transformers <pre_selection>`
-:class:`~deepfolio.pre_selection.DropCorrelated` to remove highly correlated assets before
-the optimization.
-
-Highly correlated assets tend to increase the instability of mean-variance optimization.
-
-In this example, we will compare a mean-variance optimization with and without
-pre-selection.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 :ref:`dataset <datasets>` composed of the daily prices of 64
-# assets from the FTSE 100 Index composition starting from 2000-01-04 up to 2023-05-31:
-from plotly.io import show
-from PyTorch import set_config
-from PyTorch.model_selection import train_test_split
-from PyTorch.pipeline import Pipeline
-
-from deepfolio import Population, RatioMeasure
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.model_selection import (
-    CombinatorialPurgedCV,
-    cross_val_predict,
-    optimal_folds_number
-)
-from deepfolio.optimization import MeanRisk, ObjectiveFunction
-from deepfolio.pre_selection import DropCorrelated
-from deepfolio.preprocessing import prices_to_returns
-
-prices = load_ftse100_dataset()
-
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# First, we create a maximum Sharpe Ratio model without pre-selection and fit it on the
-# training set:
-model1 = MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_RATIO)
-model1.fit(X_train)
-model1.weights_
-
-# %%
-# Pipeline
-# ========
-# Then, we create a maximum Sharpe ratio model with pre-selection using `Pipepline` and
-# fit it on the training set:
-set_config(transform_output="pandas")
-
-model2 = Pipeline([
-    ("pre_selection", DropCorrelated(threshold=0.5)),
-    ("optimization", MeanRisk(objective_function=ObjectiveFunction.MAXIMIZE_RATIO)),
-])
-model2.fit(X_train)
-model2.named_steps["optimization"].weights_
-
-# %%
-# Prediction
-# ==========
-# We predict both models on the test set:
-ptf1 = model1.predict(X_test)
-ptf1.name = "model1"
-ptf2 = model2.predict(X_test)
-ptf2.name = "model2"
-
-print(ptf1.n_assets)
-print(ptf2.n_assets)
-
-# %%
-# Each predicted object is a `MultiPeriodPortfolio`.
-# For improved analysis, we can add them to a `Population`:
-population = Population([ptf1, ptf2])
-
-# %%
-# Let's plot the portfolios cumulative returns on the test set:
-population.plot_cumulative_returns()
-
-# %%
-# Combinatorial Purged Cross-Validation
-# =====================================
-# Only using one testing path (the historical path) may not be enough for comparing both
-# models. For a more robust analysis, we can use the
-# :class:`~deepfolio.model_selection.CombinatorialPurgedCV` to create multiple testing
-# paths from different training folds combinations.
-#
-# We choose `n_folds` and `n_test_folds` to obtain around 100 test paths and an average
-# training size of 800 days:
-n_folds, n_test_folds = optimal_folds_number(
-    n_observations=X_test.shape[0],
-    target_n_test_paths=100,
-    target_train_size=800,
-)
-
-cv = CombinatorialPurgedCV(n_folds=n_folds, n_test_folds=n_test_folds)
-cv.summary(X_test)
-
-# %%
-pred_1 = cross_val_predict(
-    model1,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(annualized_factor=252, tag="model1"),
-)
-
-pred_2 = cross_val_predict(
-    model2,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(annualized_factor=252, tag="model2"),
-)
-
-# %%
-# The predicted object is a `Population` of `MultiPeriodPortfolio`. Each
-# `MultiPeriodPortfolio` represents one testing path of a rolling portfolio.
-# For improved analysis, we can merge the populations of each model:
-population = pred_1 + pred_2
-
-# %%
-# Distribution
-# ============
-# We plot the out-of-sample distribution of Sharpe ratio for both models:
-fig = population.plot_distribution(
-    measure_list=[RatioMeasure.SHARPE_RATIO], tag_list=["model1", "model2"], n_bins=40
-)
-show(fig)
-
-# %%
-# |
-#
-# Model 1:
-print(
-    "Average of Sharpe Ratio:"
-    f" {pred_1.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-print(
-    "Std of Sharpe Ratio:"
-    f" {pred_1.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-
-# %%
-# Model 2:
-print(
-    "Average of Sharpe Ratio:"
-    f" {pred_2.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
-print(
-    "Std of Sharpe Ratio:"
-    f" {pred_2.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO):0.2f}"
-)
diff --git a/examples/7_pre_selection/plot_2_select_best_performers.py b/examples/7_pre_selection/plot_2_select_best_performers.py
deleted file mode 100644
index e104813..0000000
--- a/examples/7_pre_selection/plot_2_select_best_performers.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""
-======================
-Select Best Performers
-======================
-
-This tutorial introduces the :ref:`pre-selection transformers <pre_selection>`
-:class:`~deepfolio.pre_selection.SelectKExtremes` to select the `k` best or the `k` worst
-assets according to a given measure before the optimization.
-
-In this example, we will use a `Pipeline` to assemble the pre-selection step with a
-minimum variance optimization. Then, we will use cross-validation to find the optimal
-number of pre-selected assets to maximize the mean out-of-sample Sharpe Ratio.
-"""
-
-# %%
-# Data
-# ====
-# We load the FTSE 100 :ref:`dataset <datasets>` composed of the daily prices of 64
-# assets from the FTSE 100 Index starting from 2000-01-04 up to 2023-05-31:
-import plotly.graph_objs as go
-from plotly.io import show
-from PyTorch import set_config
-from PyTorch.model_selection import GridSearchCV, train_test_split
-from PyTorch.pipeline import Pipeline
-
-from deepfolio import Population, RatioMeasure
-from deepfolio.datasets import load_ftse100_dataset
-from deepfolio.metrics import make_scorer
-from deepfolio.model_selection import (
-    WalkForward,
-    cross_val_predict,
-)
-from deepfolio.moments import EmpiricalCovariance
-from deepfolio.optimization import MeanRisk
-from deepfolio.pre_selection import SelectKExtremes
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior
-
-prices = load_ftse100_dataset()
-X = prices_to_returns(prices)
-X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
-
-# %%
-# Model
-# =====
-# First, we create a Minimum Variance model without pre-selection:
-benchmark = MeanRisk(
-    prior_estimator=EmpiricalPrior(
-        covariance_estimator=EmpiricalCovariance(nearest=True)
-    ),
-)
-# %%
-# .. note::
-#   A covariance matrix is in theory positive semi-definite (PSD). However, due to
-#   floating-point inaccuracies, we can end up with a covariance matrix that is just
-#   slightly non-PSD. This often occurs in high dimensional problems. By setting the
-#   `nearest` parameter from the covariance estimator to `True`, when the covariance
-#   is not positive semi-definite (PSD), it is replaced by the nearest covariance that
-#   is PSD without changing the variance.
-
-# %%
-# Pipeline
-# ========
-# Then, we create a Minimum Variance model with pre-selection using `Pipepline`:
-set_config(transform_output="pandas")
-
-model = Pipeline([("pre_selection", SelectKExtremes()), ("optimization", benchmark)])
-
-# %%
-# Parameter Tuning
-# ================
-# To demonstrate how parameter tuning works in a Pipeline model, we find the number of
-# pre-selected assets `k` that maximizes the out-of-sample Sharpe Ratio using
-# `GridSearchCV` with `WalkForward` cross-validation on the training set. The
-# `WalkForward` is chosen to simulate a three months (60 business days) rolling
-# portfolio fitted on the previous year (252 business days):
-cv = WalkForward(train_size=252, test_size=60)
-
-scorer = make_scorer(RatioMeasure.ANNUALIZED_SHARPE_RATIO)
-# %%
-# Note that we can also create a custom scorer this way:
-# `scorer=make_scorer(lambda pred: pred.mean - 0.5 * pred.variance)`
-
-grid_search = GridSearchCV(
-    estimator=model,
-    cv=cv,
-    n_jobs=-1,
-    param_grid={"pre_selection__k": list(range(5, 66, 3))},
-    scoring=scorer,
-    return_train_score=True,
-)
-grid_search.fit(X_train)
-model = grid_search.best_estimator_
-print(model)
-
-# %%
-# Let's plot the train and test scores as a function of the number of pre-selected
-# assets. The vertical line represents the best test score and the selected model:
-cv_results = grid_search.cv_results_
-fig = go.Figure([
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_train_score"],
-        name="Train",
-        mode="lines",
-        line=dict(color="rgb(31, 119, 180)"),
-    ),
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_train_score"] + cv_results["std_train_score"],
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-    ),
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_train_score"] - cv_results["std_train_score"],
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-        fillcolor="rgba(31, 119, 180,0.15)",
-        fill="tonexty",
-    ),
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_test_score"],
-        name="Test",
-        mode="lines",
-        line=dict(color="rgb(255,165,0)"),
-    ),
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_test_score"] + cv_results["std_test_score"],
-        mode="lines",
-        line=dict(width=0),
-        showlegend=False,
-    ),
-    go.Scatter(
-        x=cv_results["param_pre_selection__k"],
-        y=cv_results["mean_test_score"] - cv_results["std_test_score"],
-        line=dict(width=0),
-        mode="lines",
-        fillcolor="rgba(255,165,0, 0.15)",
-        fill="tonexty",
-        showlegend=False,
-    ),
-])
-fig.add_vline(
-    x=grid_search.best_params_["pre_selection__k"],
-    line_width=2,
-    line_dash="dash",
-    line_color="green",
-)
-fig.update_layout(
-    title="Train/Test score",
-    xaxis_title="Number of pre-selected best performers",
-    yaxis_title="Annualized Sharpe Ratio",
-)
-fig.update_yaxes(tickformat=".2f")
-show(fig)
-
-# %%
-# |
-#
-# The mean test Sharpe Ratio increases from 1.17 (for k=5) to its maximum 1.91
-# (for k=50) then decreases to 1.81 (for k=65).
-# The selected model is a pre-selection of the top 50 performers based on their Sharpe
-# Ratio, followed by a Minimum Variance optimization.
-
-# %%
-# Prediction
-# ==========
-# Now we evaluate the two models using the same `WalkForward` object on the test set:
-pred_bench = cross_val_predict(
-    benchmark,
-    X_test,
-    cv=cv,
-    portfolio_params=dict(name="Benchmark"),
-)
-
-pred_model = cross_val_predict(
-    model,
-    X_test,
-    cv=cv,
-    n_jobs=-1,
-    portfolio_params=dict(name="Pre-selection"),
-)
-
-# %%
-# Each predicted object is a `MultiPeriodPortfolio`.
-# For improved analysis, we can add them to a `Population`:
-population = Population([pred_bench, pred_model])
-
-# %%
-# Let's plot the rolling portfolios cumulative returns on the test set:
-population.plot_cumulative_returns()
-
-# %%
-# Let's plot the rolling portfolios compositions:
-population.plot_composition(display_sub_ptf_name=False)
-
-# %%
-# Let's display the full summary:
-population.summary()
diff --git a/examples/8_data_preparation/README.txt b/examples/8_data_preparation/README.txt
deleted file mode 100644
index f40d9f2..0000000
--- a/examples/8_data_preparation/README.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-.. _data_preparation_examples:
-
-Data Preparation
-----------------
-
-Examples about data preparation.
-
-
diff --git a/examples/8_data_preparation/plot_1_investment_horizon.py b/examples/8_data_preparation/plot_1_investment_horizon.py
deleted file mode 100644
index 16c4ff8..0000000
--- a/examples/8_data_preparation/plot_1_investment_horizon.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""
-==================
-Investment Horizon
-==================
-
-This tutorial explores the difference between the general
-procedure using different investment horizon and the simplified procedure as explained
-in :ref:`data preparation <data_preparation>`.
-"""
-
-# %%
-# Prices
-# ======
-# We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
-# assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28:
-from plotly.io import show
-
-from deepfolio import PerfMeasure, Population, RiskMeasure
-from deepfolio.datasets import load_sp500_dataset
-from deepfolio.optimization import MeanRisk
-from deepfolio.preprocessing import prices_to_returns
-from deepfolio.prior import EmpiricalPrior
-
-prices = load_sp500_dataset()
-prices.head()
-
-# %%
-# Linear Returns
-# ==============
-# We transform the daily prices into daily linear returns:
-X = prices_to_returns(prices)
-
-# %%
-# Model
-# =====
-# We first create a Mean-Variance model using the simplified procedure:
-population = Population([])
-
-model = MeanRisk(
-    risk_measure=RiskMeasure.VARIANCE,
-    efficient_frontier_size=30,
-    portfolio_params=dict(name="Simplified", tag="Simplified"),
-)
-population.extend(model.fit_predict(X))
-
-for tag, investment_horizon in [
-    ("3M", 252 / 4),
-    ("1Y", 252),
-    ("10Y", 10 * 252),
-]:
-    model = MeanRisk(
-        risk_measure=RiskMeasure.VARIANCE,
-        efficient_frontier_size=30,
-        prior_estimator=EmpiricalPrior(
-            is_log_normal=True, investment_horizon=investment_horizon
-        ),
-        portfolio_params=dict(name=f"General - {tag}", tag=f"General - {tag}"),
-    )
-    population.extend(model.fit_predict(X))
-
-
-# %%
-# Let's plot the efficient frontier:
-fig = population.plot_measures(
-    x=RiskMeasure.ANNUALIZED_VARIANCE,
-    y=PerfMeasure.ANNUALIZED_MEAN,
-)
-show(fig)
-
-# %%
-# |
-#
-# Let's plot the portfolios compositions:
-population.plot_composition()
-
-# %%
-# We can see that the simplified procedure only start to diverge from the general one
-# for investment horizons longer than one year.
diff --git a/examples/deep_learning/example_transformer.py b/examples/deep_learning/example_transformer.py
deleted file mode 100644
index 7f6b07d..0000000
--- a/examples/deep_learning/example_transformer.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from deepfolio.models.transformer import Transformer
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler
-import yfinance as yf
-import tensorflow as tf
-import numpy as np
-import matplotlib.pyplot as plt
-
-# Set random seeds for reproducibility
-tf.random.set_seed(42)
-np.random.seed(42)
-
-# Model parameters
-n_feature = 5  # Number of features per asset
-n_assets = 10  # Number of assets
-n_timestep = 30  # Number of time steps
-n_layer = 3  # Number of Transformer layers
-n_head = 8  # Number of attention heads
-n_hidden = 64  # Number of hidden units
-n_dropout = 0.1  # Dropout rate
-batch_size = 32
-epochs = 50
-lb = 0.0  # Lower bound for asset weights
-ub = 1.0  # Upper bound for asset weights
-
-def get_stock_data(tickers, start_date, end_date):
-    data = yf.download(tickers, start=start_date, end=end_date)
-    return data['Adj Close']
-
-# Get the first 10 stocks of S&P 500 as an example
-sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
-tickers = sp500['Symbol'].tolist()[:n_assets]
-
-# Download stock data
-stock_data = get_stock_data(tickers, '2010-01-01', '2023-01-01')
-
-# Calculate daily returns
-returns = stock_data.pct_change().dropna()
-
-def calculate_features(returns):
-    features = pd.DataFrame()
-    for ticker in returns.columns:
-        # Calculate 5-day, 10-day, and 20-day moving averages
-        features[f'{ticker}_MA5'] = returns[ticker].rolling(window=5).mean()
-        features[f'{ticker}_MA10'] = returns[ticker].rolling(window=10).mean()
-        features[f'{ticker}_MA20'] = returns[ticker].rolling(window=20).mean()
-        # Calculate 5-day, 10-day, and 20-day volatility
-        features[f'{ticker}_VOL5'] = returns[ticker].rolling(window=5).std()
-        features[f'{ticker}_VOL10'] = returns[ticker].rolling(window=10).std()
-        features[f'{ticker}_VOL20'] = returns[ticker].rolling(window=20).std()
-    return features.dropna()
-
-features = calculate_features(returns)
-
-# Prepare input data
-scaler = MinMaxScaler()
-scaled_features = scaler.fit_transform(features)
-
-X = []
-y = []
-for i in range(len(scaled_features) - n_timestep):
-    X.append(scaled_features[i:i+n_timestep])
-    y.append(returns.iloc[i+n_timestep].values)
-
-X = np.array(X)
-y = np.array(y)
-
-# Split into training and test sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
-# Custom loss function: negative Sharpe ratio
-def negative_sharpe_ratio(y_true, y_pred):
-    returns = tf.reduce_sum(y_true * y_pred, axis=1)
-    expected_return = tf.reduce_mean(returns)
-    stddev = tf.math.reduce_std(returns)
-    return -expected_return / (stddev + 1e-6)  # Add small value to avoid division by zero
-
-# Create and compile the model
-model = Transformer(n_feature * n_assets, n_timestep, n_layer, n_head, n_hidden, n_dropout, n_assets, lb, ub)
-model.compile(optimizer='adam', loss=negative_sharpe_ratio)
-
-# Train the model
-history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)
-
-# Evaluate the model
-test_loss = model.evaluate(X_test, y_test)
-print(f"Test loss: {test_loss}")
-
-# Make predictions using the model
-predictions = model.predict(X_test)
-
-# Calculate Sharpe ratio on the test set
-test_returns = np.sum(y_test * predictions, axis=1)
-sharpe_ratio = np.mean(test_returns) / np.std(test_returns)
-print(f"Sharpe Ratio on test set: {sharpe_ratio}")
-
-# Visualize results
-plt.figure(figsize=(10, 5))
-plt.plot(history.history['loss'], label='Training Loss')
-plt.plot(history.history['val_loss'], label='Validation Loss')
-plt.title('Model Loss')
-plt.xlabel('Epoch')
-plt.ylabel('Loss')
-plt.legend()
-plt.show()
-
-# Visualize asset allocation for the last time step
-plt.figure(figsize=(10, 5))
-plt.bar(tickers, predictions[-1])
-plt.title('Asset Allocation for Last Time Step')
-plt.xlabel('Assets')
-plt.ylabel('Weight')
-plt.xticks(rotation=45)
-plt.show()
diff --git a/requirements.txt b/requirements.txt
index 4f323cc3f484b8d459a3ba180380dcd0fe7d30ef..ccc448cad60732d4b153d0d4e21b55c6d515ad2e 100644
GIT binary patch
delta 123
zcmXAhQ4T;b5JVrJ5=U_;8VON~mT(Jaae$u9w0<T#GjBE)Z~4SfQ(-u{Sh=yZ(w-ep
oY64ETVaRS)8^l?qK|0dq+=<?lr-c{&Z>RjHm1{&FEZHaK0|(_6b^rhX

delta 6
NcmdnQw2yJZJ^%?70>S_Q

diff --git a/setup.py b/setup.py
index b92abaf..a4af17f 100644
--- a/setup.py
+++ b/setup.py
@@ -2,12 +2,12 @@
 
 setuptools.setup(
     name='deepfolio',
-    version='1.0.4',
+    version='1.2.0',
     packages=setuptools.find_packages(),
     install_requires=[],
     url='https://github.com/jialuechen/deepfolio',
     license='BSD-2',
     author='Jialue Chen',
     author_email='jialuechen@outlook.com',
-    description='Portfolio Optimization Python Library Built on top of Keras and Tensorflow'
+    description='Python Library for Portfolio Optimization Built on top of Deep Learning'
 )