Skip to content

Commit

Permalink
Add asymptotic analysis part 1 (#37)
Browse files Browse the repository at this point in the history
Added additional theoretical analysis of the estimator to the paper. Added experiments with asymmetric and increasing data density.
  • Loading branch information
yaniv-shulman authored Dec 31, 2024
1 parent f4de92b commit e35cf92
Show file tree
Hide file tree
Showing 21 changed files with 21,598 additions and 918 deletions.
5 changes: 2 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import numpy as np
import pandas as pd

from experiments.common import plot_results, ExperimentConfig
from experiments.data.synthetic_benchmarks import benchmark_curve_1
from experiments.data.synthetic_normal_benchmarks import benchmark_curve_1
from rsklpr.rsklpr import Rsklpr

experiment_config: ExperimentConfig = ExperimentConfig(
Expand Down Expand Up @@ -89,13 +89,12 @@ plot_results(
```
![Example usage curve_plot](./example_usage_curve.png)


```python
import numpy as np
import pandas as pd

from experiments.common import plot_results, ExperimentConfig
from experiments.data.synthetic_benchmarks import benchmark_plane_2
from experiments.data.synthetic_normal_benchmarks import benchmark_plane_2
from rsklpr.rsklpr import Rsklpr

experiment_config: ExperimentConfig = ExperimentConfig(
Expand Down
Binary file removed paper/graphics/example_regression_1d_1.png
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added paper/graphics/gaussian_example_regression_1d.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added paper/graphics/gaussian_increasing_numpoints.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/rsklpr.pdf
Binary file not shown.
324 changes: 285 additions & 39 deletions paper/rsklpr.tex

Large diffs are not rendered by default.

1,166 changes: 604 additions & 562 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ chart-studio = "^1.1.0"
ipywidgets = "^8.1.5"
localreg = "^0.5.0"
matplotlib = "^3.9.2"
notebook = "^7.2.2"
notebook = "^7.3.1"
pandas = "^2.2.3"
plotly = "^5.24.1"
statsmodels = "^0.14.3"
Expand Down
129 changes: 129 additions & 0 deletions src/experiments/data/synthetic_asymmetric_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from typing import Tuple

import numpy as np
from scipy.special import gamma as gamma_function


def benchmark_curve_exponential(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Generates a dataset of points sampled from a exponential distribution where the mean is a smooth curve.
Args:
num_points: The number of points sampled from the curve.
Returns:
The predictor, response and ground truth.
"""
generator: np.random.Generator = np.random.default_rng(seed=14)
x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
sort_idx: np.ndarray = np.argsort(a=x)
x = x[sort_idx]

y_true: np.ndarray = np.sqrt(np.abs(np.power(x, 3) - 4 * np.power(x, 4) / 3)) + (
0.1 * x / np.max(x) * np.sin(x * 3 * np.pi) * np.sin(x * 3 * np.pi)
)

y_true = y_true - y_true.min() + 0.1

y: np.ndarray = generator.exponential(scale=y_true)

return (
x,
y,
y_true,
)


def benchmark_curve_log_normal(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Generates a dataset of points sampled from a log-normal distribution where the mean matches a smooth curve.
Args:
num_points: The number of points sampled from the curve.
Returns:
The predictor, response and ground truth.
"""
generator: np.random.Generator = np.random.default_rng(seed=14)
x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
x = np.maximum(x, 0) # Ensure x is non-negative
sort_idx: np.ndarray = np.argsort(a=x)
x = x[sort_idx]

y_true: np.ndarray = np.abs(np.sin(2 * np.pi * x) + 0.5 * np.power(x, 1.5))
y_true = y_true - y_true.min() + 0.1

sigma = 0.5 # Standard deviation of the log-normal distribution
mu = np.log(y_true) - sigma**2 / 2

y: np.ndarray = generator.lognormal(mean=mu, sigma=sigma)

return (
x,
y,
y_true,
)


def benchmark_curve_gamma(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Generates a dataset of points sampled from a gamma distribution where the mean matches a smooth curve.
Args:
num_points: The number of points sampled from the curve.
Returns:
The predictor, response and ground truth.
"""
generator: np.random.Generator = np.random.default_rng(seed=14)
x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
sort_idx: np.ndarray = np.argsort(a=x)
x = x[sort_idx]

y_true: np.ndarray = np.abs(np.power(x, 2) - 2 * x + 0.5)
y_true = y_true - y_true.min() + 0.1

shape = 2.0 # Gamma shape parameter
scale = y_true / shape

y: np.ndarray = generator.gamma(shape=shape, scale=scale)

return (
x,
y,
y_true,
)


def benchmark_curve_weibull(num_points: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Generates a dataset of points sampled from a Weibull distribution where the mean matches a smooth curve.
Args:
num_points: The number of points sampled from the curve.
Returns:
The predictor, response and ground truth.
"""
generator: np.random.Generator = np.random.default_rng(seed=14)
x: np.ndarray = np.linspace(start=0.0, stop=1.0, num=num_points)
x += generator.normal(scale=1 / np.sqrt(num_points), size=x.shape[0])
sort_idx: np.ndarray = np.argsort(a=x)
x = x[sort_idx]

y_true: np.ndarray = np.abs(np.cos(np.pi * x) + x * x)
y_true = y_true - y_true.min() + 0.1

shape = 1.5 # Weibull shape parameter
scale = y_true / gamma_function(1 + 1 / shape)

y: np.ndarray = scale * np.power(-np.log(1 - generator.uniform(size=num_points)), 1 / shape)

return (
x,
y,
y_true,
)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

20,264 changes: 20,264 additions & 0 deletions src/experiments/increasing_data_density.ipynb

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions src/experiments/multivar_heteroscedastic_dense_increasing_window.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

0 comments on commit e35cf92

Please sign in to comment.