Add asymptotic analysis part 1 (#37)

Added additional theoretical analysis of the estimator to the paper. Added experiments with asymmetric and increasing data density.
yaniv-shulman · Dec 31, 2024 · e35cf92 · e35cf92
1 parent f4de92b
commit e35cf92
Show file tree

Hide file tree

Showing 21 changed files with 21,598 additions and 918 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -42,7 +42,7 @@ import numpy as np
 import pandas as pd
 
 from experiments.common import plot_results, ExperimentConfig
-from experiments.data.synthetic_benchmarks import benchmark_curve_1
+from experiments.data.synthetic_normal_benchmarks import benchmark_curve_1
 from rsklpr.rsklpr import Rsklpr
 
 experiment_config: ExperimentConfig = ExperimentConfig(
@@ -89,13 +89,12 @@ plot_results(
 ```
 ![Example usage curve_plot](./example_usage_curve.png)
 
-
 ```python
 import numpy as np
 import pandas as pd
 
 from experiments.common import plot_results, ExperimentConfig
-from experiments.data.synthetic_benchmarks import benchmark_plane_2
+from experiments.data.synthetic_normal_benchmarks import benchmark_plane_2
 from rsklpr.rsklpr import Rsklpr
 
 experiment_config: ExperimentConfig = ExperimentConfig(

diff --git a/paper/graphics/example_regression_1d_1.png b/paper/graphics/example_regression_1d_1.png
diff --git a/paper/graphics/exponential_asymmetrical_increasing_numpoints.png b/paper/graphics/exponential_asymmetrical_increasing_numpoints.png
diff --git a/paper/graphics/gamma_asymmetrical_increasing_numpoints.png b/paper/graphics/gamma_asymmetrical_increasing_numpoints.png
diff --git a/paper/graphics/gaussian_example_regression_1d.png b/paper/graphics/gaussian_example_regression_1d.png
diff --git a/paper/graphics/gaussian_increasing_numpoints.png b/paper/graphics/gaussian_increasing_numpoints.png
diff --git a/paper/graphics/log_normal_asymmetrical_increasing_numpoints.png b/paper/graphics/log_normal_asymmetrical_increasing_numpoints.png
diff --git a/paper/graphics/weibull_asymmetrical_increasing_numpoints.png b/paper/graphics/weibull_asymmetrical_increasing_numpoints.png
diff --git a/paper/rsklpr.pdf b/paper/rsklpr.pdf
diff --git a/paper/rsklpr.tex b/paper/rsklpr.tex
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,7 +42,7 @@ chart-studio = "^1.1.0"
 ipywidgets = "^8.1.5"
 localreg = "^0.5.0"
 matplotlib = "^3.9.2"
-notebook = "^7.2.2"
+notebook = "^7.3.1"
 pandas = "^2.2.3"
 plotly = "^5.24.1"
 statsmodels = "^0.14.3"

diff --git a/src/experiments/data/synthetic_asymmetric_benchmarks.py b/src/experiments/data/synthetic_asymmetric_benchmarks.py
@@ -0,0 +1,129 @@
+from typing import Tuple
+
+import numpy as np
+from scipy.special import gamma as gamma_function
+
+
+def benchmark_curve_exponential(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generates a dataset of points sampled from a exponential distribution where the mean is a smooth curve.
+
+    Args:
+        num_points: The number of points sampled from the curve.
+
+    Returns:
+        The predictor, response and ground truth.
+    """
+    generator: np.random.Generator = np.random.default_rng(seed=14)
+    x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
+    x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
+    sort_idx: np.ndarray = np.argsort(a=x)
+    x = x[sort_idx]
+
+    y_true: np.ndarray = np.sqrt(np.abs(np.power(x, 3) - 4 * np.power(x, 4) / 3)) + (
+        0.1 * x / np.max(x) * np.sin(x * 3 * np.pi) * np.sin(x * 3 * np.pi)
+    )
+
+    y_true = y_true - y_true.min() + 0.1
+
+    y: np.ndarray = generator.exponential(scale=y_true)
+
+    return (
+        x,
+        y,
+        y_true,
+    )
+
+
+def benchmark_curve_log_normal(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generates a dataset of points sampled from a log-normal distribution where the mean matches a smooth curve.
+
+    Args:
+        num_points: The number of points sampled from the curve.
+
+    Returns:
+        The predictor, response and ground truth.
+    """
+    generator: np.random.Generator = np.random.default_rng(seed=14)
+    x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
+    x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
+    x = np.maximum(x, 0)  # Ensure x is non-negative
+    sort_idx: np.ndarray = np.argsort(a=x)
+    x = x[sort_idx]
+
+    y_true: np.ndarray = np.abs(np.sin(2 * np.pi * x) + 0.5 * np.power(x, 1.5))
+    y_true = y_true - y_true.min() + 0.1
+
+    sigma = 0.5  # Standard deviation of the log-normal distribution
+    mu = np.log(y_true) - sigma**2 / 2
+
+    y: np.ndarray = generator.lognormal(mean=mu, sigma=sigma)
+
+    return (
+        x,
+        y,
+        y_true,
+    )
+
+
+def benchmark_curve_gamma(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generates a dataset of points sampled from a gamma distribution where the mean matches a smooth curve.
+
+    Args:
+        num_points: The number of points sampled from the curve.
+
+    Returns:
+        The predictor, response and ground truth.
+    """
+    generator: np.random.Generator = np.random.default_rng(seed=14)
+    x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
+    x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
+    sort_idx: np.ndarray = np.argsort(a=x)
+    x = x[sort_idx]
+
+    y_true: np.ndarray = np.abs(np.power(x, 2) - 2 * x + 0.5)
+    y_true = y_true - y_true.min() + 0.1
+
+    shape = 2.0  # Gamma shape parameter
+    scale = y_true / shape
+
+    y: np.ndarray = generator.gamma(shape=shape, scale=scale)
+
+    return (
+        x,
+        y,
+        y_true,
+    )
+
+
+def benchmark_curve_weibull(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generates a dataset of points sampled from a Weibull distribution where the mean matches a smooth curve.
+
+    Args:
+        num_points: The number of points sampled from the curve.
+
+    Returns:
+        The predictor, response and ground truth.
+    """
+    generator: np.random.Generator = np.random.default_rng(seed=14)
+    x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
+    x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
+    sort_idx: np.ndarray = np.argsort(a=x)
+    x = x[sort_idx]
+
+    y_true: np.ndarray = np.abs(np.cos(np.pi * x) + x * x)
+    y_true = y_true - y_true.min() + 0.1
+
+    shape = 1.5  # Weibull shape parameter
+    scale = y_true / gamma_function(1 + 1 / shape)
+
+    y: np.ndarray = scale * np.power(-np.log(1 - generator.uniform(size=num_points)), 1 / shape)
+
+    return (
+        x,
+        y,
+        y_true,
+    )
diff --git a/src/experiments/data/synthetic_benchmarks.py → ...ments/data/synthetic_normal_benchmarks.py b/src/experiments/data/synthetic_benchmarks.py → ...ments/data/synthetic_normal_benchmarks.py
diff --git a/...roscedastic_dense_increasing_window.ipynb → ...roscedastic_dense_increasing_window.ipynb b/...roscedastic_dense_increasing_window.ipynb → ...roscedastic_dense_increasing_window.ipynb
diff --git a/...oscedastic_sparse_increasing_window.ipynb → ...oscedastic_sparse_increasing_window.ipynb b/...oscedastic_sparse_increasing_window.ipynb → ...oscedastic_sparse_increasing_window.ipynb
diff --git a/...moscedastic_dense_increasing_window.ipynb → ...moscedastic_dense_increasing_window.ipynb b/...moscedastic_dense_increasing_window.ipynb → ...moscedastic_dense_increasing_window.ipynb
diff --git a/...oscedastic_sparse_increasing_window.ipynb → ...oscedastic_sparse_increasing_window.ipynb b/...oscedastic_sparse_increasing_window.ipynb → ...oscedastic_sparse_increasing_window.ipynb
diff --git a/src/experiments/increasing_data_density.ipynb b/src/experiments/increasing_data_density.ipynb
diff --git a/src/experiments/multivar_heteroscedastic_dense_increasing_window.ipynb b/src/experiments/multivar_heteroscedastic_dense_increasing_window.ipynb
diff --git a/src/experiments/multivar_heteroscedastic_sparse_increasing_window.ipynb b/src/experiments/multivar_heteroscedastic_sparse_increasing_window.ipynb