diff --git a/tutorials/getting_started.py b/tutorials/getting_started.py new file mode 100644 index 00000000..8faefa6d --- /dev/null +++ b/tutorials/getting_started.py @@ -0,0 +1,985 @@ +# %% [markdown] +# ## Getting started with Axtreme +# +# This notebook introduces the key concepts of **Axtreme** using a simple toy problem. +# We demonstrate how to define a simulation problem, build a surrogate model, estimate +# a Quantity of Interest (QoI), and use Design of Experiments (DoE) to efficiently +# reduce uncertainty. +# +# The imports below set up: +# +# - **Numerics & plotting**: `numpy`, `torch`, `matplotlib` +# - **Ax / BoTorch**: experiment definition and Gaussian-process modelling +# - **Axtreme**: helper functions for QoI estimation and DoE tailored to extreme responses +# - **Toy example code**: a small Gumbel-based simulator and environment data used only for this tutorial + +# %% +import sys +from collections.abc import Callable +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import scipy +import torch +from ax import Experiment, SearchSpace +from ax.core import GeneratorRun, ObservationFeatures, ParameterType, RangeParameter +from ax.core.trial import Trial +from ax.modelbridge.registry import Models +from matplotlib.axes import Axes +from scipy.stats import gumbel_r +from torch.distributions import Normal +from torch.utils.data import DataLoader + +from axtreme import sampling +from axtreme.acquisition import QoILookAhead +from axtreme.data import FixedRandomSampler, MinimalDataset +from axtreme.experiment import add_sobol_points_to_experiment, make_experiment +from axtreme.metrics import QoIMetric +from axtreme.plotting.doe import plot_qoi_estimates_from_experiment +from axtreme.plotting.histogram3d import histogram_surface3d +from axtreme.qoi import MarginalCDFExtrapolation +from axtreme.sampling.ut_sampler import UTSampler +from axtreme.utils import population_estimators, transforms + +# Configure torch +torch.set_default_dtype(torch.float64) + +# Load the toy problem +root_dir = Path("../") +sys.path.append(str(root_dir)) +from examples.basic_example_usecase.problem.brute_force import collect_or_calculate_results +from examples.basic_example_usecase.problem.env_data import collect_data +from examples.basic_example_usecase.problem.simulator import DummySimulatorSeeded + +print("✓ Setup complete") + +# %% [markdown] +# ### Axtreme Workflow Overview +# +# The following diagram illustrates the general Axtreme process for extreme response estimation: +# +# **Key Steps:** +# +# 1. **Problem Definition**: Specify the simulator and environment distribution +# 2. **Surrogate Modeling**: Build a GP-based approximation using Ax/BoTorch +# 3. **QoI Estimation**: Efficiently estimate the Quantity of Interest using the surrogate +# 4. **Uncertainty Check**: Assess if the QoI uncertainty is acceptable +# 5. **Design of Experiments**: Iteratively refine the surrogate by intelligently selecting new evaluation points + +# %% +from IPython.display import Image + +Image(filename="img/axtreme_workflow.png", width=800) + +# %% [markdown] +# ### Tutorial Note: +# This tutorial also computes a **brute-force reference** QoI using extensive simulation +# runs. This is done purely for validation purposes to demonstrate that Axtreme converges +# to the correct answer. In real applications, such brute-force computation is typically +# too expensive or infeasible. + +# %% [markdown] +# ## Step 1: Problem Inputs +# +# The `axtreme` package needs two core ingredients: +# +# 1. A **simulator** +# 2. A set of **environment samples** +# +# These two objects completely define the *probabilistic problem* we want to solve. +# +# ### 1.1 Simulator +# +# The simulator is a function +# +# $$ +# x \mapsto y +# $$ +# +# where: +# +# - $x$ is a vector of input / environment variables (here a 2D point: `x = [x1, x2]` in $[0, 1]^2$). +# - $y$ is a **random** output because the simulator includes noise. +# +# In this toy problem the noise model is a **Gumbel distribution**: +# +# - The simulator is parameterised by: +# - a **location** function `loc(x)` and +# - a **scale** function `scale(x)` +# - For each input $x$ the output is: +# +# $$ +# y \sim \text{Gumbel}(\text{loc}(x), \text{scale}(x)) +# $$ +# +# In a real application, we **don't know** the true `loc(x)` and `scale(x)`; we only see +# noisy simulator outputs. Here we cheat a little: we have access to the "true" functions +# so we can visualise what is going on and check that our methods behave sensibly. +# +# **Visualizing the stochastic nature of the simulator:** +# +# Below we run the simulator 500 times at the same input point $x = [0.5, 0.5]$. The +# histogram shows the characteristic right-skewed shape of a Gumbel distribution — this +# is the noise model built into our toy simulator. + +# %% +# Use the seeded simulator for reproducible results +sim = DummySimulatorSeeded() + +# Run the simulator multiple times at the same point to visualize the response distribution +test_point = np.array([[0.5, 0.5]]) +n_samples = 500 +response = sim(test_point, n_simulations_per_point=n_samples) + +# Visualize the distribution of simulator responses +plt.figure(figsize=(10, 4)) +plt.hist(response.flatten(), bins=40, density=True, alpha=0.7, edgecolor="black") +plt.xlabel("Response value") +plt.ylabel("Density") +plt.title("Simulator Response Distribution at x = [0.5, 0.5]") +plt.grid(alpha=0.3) +plt.show() + +print("The histogram shows the Gumbel-shaped distribution of simulator responses at a fixed input point.") + +# %% [markdown] +# ### 1.2 Environment Data +# +# The second ingredient is a set of **environment samples**: +# +# - Each sample is a 2D point $x = [x1, x2]$ +# - Together they represent the **conditions your system actually experiences** in practice: +# - These might come from historical data +# - Or from a probabilistic model of the environment +# +# In this toy example we load a pre-generated dataset: +# +# - We treat it purely as **input samples** — no simulator has been run on them yet. +# - We'll later use them to: +# - Estimate our **Extreme Response Distribution (ERD)** by brute force, and +# - Feed them into Axtreme's QoI estimators so we can reuse them efficiently with the surrogate. + +# %% +# Load environment data +env_data = collect_data().to_numpy() +print(f"Environment data shape: {env_data.shape}") +print(f"First 5 samples:\n{env_data[:5]}") + +# Visualize the environment distribution +fig = histogram_surface3d(env_data) +fig.update_layout(title_text="Environment Distribution", scene_aspectmode="cube", height=500) +fig.show() + +# %% [markdown] +# ## Step 2: Brute-force Extreme Response and QoI +# +# Before we introduce any surrogate modelling, we first compute a **reference answer** using raw simulation. +# +# ### 2.1 Extreme Response Distribution (ERD) +# +# A **period** represents a time window over which we want to find the extreme (maximum) response. +# For example, in offshore engineering this might represent a return period of 25 years, containing +# many individual sea states. The number of environment samples per period (`N_ENV_SAMPLES_PER_PERIOD`) +# determines how many conditions we observe within that time window. +# +# For each period we: +# +# 1. Draw `N_ENV_SAMPLES_PER_PERIOD` inputs from the environment data +# 2. Run the simulator on each input +# 3. Record the **maximum** response over that period +# +# Each period gives one sample from the **Extreme Response Distribution (ERD)**: +# +# $$ +# \text{ERD sample} = \max_{t \in \text{period}} y(t) +# $$ +# +# Repeating this many times gives us an empirical ERD, which we visualise as a histogram. +# +# ### 2.2 Quantity of Interest (QoI) +# +# In this tutorial our **Quantity of Interest (QoI)** is: +# +# > The **median** of the Extreme Response Distribution. +# +# Intuitively: +# +# - We're asking: "What is a *typical* extreme response over a period?" +# - This is a robust measure of tail behaviour that is easier to estimate than, say, a 1-in-1000-year extreme. +# +# We compute: +# +# - A **brute-force estimate** of the ERD median using many simulator calls +# - An associated **estimated uncertainty** using Axtreme's `population_estimators` helpers +# +# This brute-force QoI estimate will serve as: +# +# - A **ground-truth** comparison for the surrogate-based estimates we build later. + +# %% +# Define the period length (number of environment samples per period) +N_ENV_SAMPLES_PER_PERIOD = 1000 + +# Load pre-calculated brute force results (300,000 ERD samples) +precalced_erd_samples, precalced_erd_x = collect_or_calculate_results(N_ENV_SAMPLES_PER_PERIOD, 300_000) +brute_force_qoi_estimate = np.median(precalced_erd_samples) + +# Calculate QoI uncertainty using population estimators +population_median_est_dist = population_estimators.sample_median_se(torch.tensor(precalced_erd_samples)) + +print(f"Brute force QoI (median ERD): {brute_force_qoi_estimate:.4f}") +print(f"This required 300,000 × {N_ENV_SAMPLES_PER_PERIOD} = 300M simulator calls!") + +# Visualize the ERD with QoI uncertainty +fig, axes = plt.subplots(ncols=2, figsize=(14, 4)) + +# Left plot: ERD histogram with QoI uncertainty overlay +axes[0].hist(precalced_erd_samples, bins=100, density=True, alpha=0.7, edgecolor="black", label="ERD") +axes[0].set_xlabel("Extreme Response") +axes[0].set_ylabel("ERD Density") +axes[0].set_title("Extreme Response Distribution (Brute Force)") +axes[0].grid(alpha=0.3) + +# Add QoI uncertainty on secondary y-axis +ax_twin = axes[0].twinx() +population_estimators.plot_dist(population_median_est_dist, ax=ax_twin, c="red", label="QoI estimate") +ax_twin.set_ylabel("QoI Estimate PDF") +ax_twin.legend(loc="upper right") + +# Add median line +axes[0].axvline(brute_force_qoi_estimate, color="red", linestyle="--", linewidth=2, alpha=0.5) +axes[0].legend(loc="upper left") + +# Right plot: Zoomed-in view of QoI uncertainty only +population_estimators.plot_dist(population_median_est_dist, ax=axes[1], c="red") +axes[1].axvline( + brute_force_qoi_estimate, + color="red", + linestyle="--", + linewidth=2, + label=f"Median ERD = {brute_force_qoi_estimate:.4f}", +) +axes[1].set_xlabel("QoI Value (Median ERD)") +axes[1].set_ylabel("Density") +axes[1].set_title("QoI Estimate Distribution (Zoomed)") +axes[1].legend() +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() + +# %% [markdown] +# ## Step 3: Build a Surrogate Model with Ax + BoTorch +# +# Running the simulator many times is expensive. +# Instead, we build a **statistical surrogate** (a Gaussian Process) that learns: +# +# - The mapping from inputs $(x1, x2)$ to the **location** parameter of the Gumbel noise +# - The mapping from inputs $(x1, x2)$ to the **scale** parameter of the Gumbel noise +# +# ### 3.1 Configuration +# +# Before creating the Ax experiment, we set up the core components: +# +# 1. **Search space**: Define the valid input domain — here, `x1` and `x2` both in +# $[0, 1]$ +# 2. **Noise distribution**: Specify the distribution family for the simulator noise +# (Gumbel) # noqa: ERA001 +# 3. **Simulations per point**: How many simulator runs to perform at each training point +# to estimate the distribution parameters (`loc` and `scale`). Higher values lead to less +# uncertainty in the GP fit, but increase computation time. Axtreme is designed to work well +# with few simulations per point, but higher values can be useful for debugging and testing +# purposes. +# +# These configuration choices affect: +# +# - How much information each training point provides (more simulations → better estimates) +# - The computational cost per training point +# - The smoothness of the resulting surrogate model + +# %% +# 1. Define the search space (2D: x1 and x2 both in [0, 1]) +search_space = SearchSpace( + parameters=[ + RangeParameter(name="x1", parameter_type=ParameterType.FLOAT, lower=0, upper=1), + RangeParameter(name="x2", parameter_type=ParameterType.FLOAT, lower=0, upper=1), + ] +) + +# 2. Choose distribution for noise model +dist = gumbel_r + +# 3. Number of simulations per point +N_SIMULATIONS_PER_POINT = 200 + +print("✓ Search space: 2D unit square") +print("✓ Distribution: Gumbel") +print(f"✓ Simulations per point: {N_SIMULATIONS_PER_POINT}") + +# %% [markdown] +# ### 3.2 Initial training data (Sobol design) +# +# We start by generating an initial, **space-filling** design using a Sobol sequence: +# +# - Sobol points cover the space well without any modelling assumptions. +# - At each Sobol point `x`, we run the simulator multiple times to estimate: +# - the mean (location) and +# - variability (scale) of the response distribution. +# +# These initial points are used to fit the first GP surrogate using `Models.BOTORCH_MODULAR`. +# +# **What the GP actually learns:** +# +# The `make_experiment()` function is a helper that configures an Ax experiment with +# the appropriate settings. When trials are later added and run, the experiment will: +# +# 1. Run the simulator `N_SIMULATIONS_PER_POINT` times at each training point `x` +# 2. Fit the Gumbel distribution to those samples to estimate `loc(x)` and `scale(x)` +# 3. Provide these fitted parameters to the GP +# +# So the GP learns **two separate mappings**: +# - `x → loc`: How the location parameter varies across the input space +# - `x → scale`: How the scale parameter varies across the input space +# +# The GP is **not** trained on raw `(x, y)` pairs. Instead, it learns the underlying +# distribution parameters from multiple noisy observations at each `x`. + + +# %% +# Helper function to create experiments with consistent settings +def make_exp() -> Experiment: + return make_experiment(sim, search_space, dist, n_simulations_per_point=N_SIMULATIONS_PER_POINT) + + +# Create experiment and add 30 initial training points +exp = make_exp() +add_sobol_points_to_experiment(exp, n_iter=30, seed=8) + +# Train the surrogate model +botorch_model_bridge = Models.BOTORCH_MODULAR( + experiment=exp, + data=exp.fetch_data(), +) + +print(f"✓ Trained GP with {len(exp.trials)} training points") +print(f"✓ Total simulator calls: {len(exp.trials) * N_SIMULATIONS_PER_POINT}") + +# %% [markdown] +# ### 3.3 Comparing surrogate vs simulator +# +# To build intuition, we pick a single test point `x = [0.5, 0.5]` and: +# +# - Sample many responses from the **true simulator** +# - Predict the mean Gumbel distribution parameters from the **surrogate** +# - Sample from the GP posterior to visualize uncertainty +# +# The plot shows: +# +# - **Histogram**: 200 simulator runs at the test point, showing the true response distribution +# - **Red line**: The surrogate's mean predicted Gumbel distribution +# - **Grey lines**: 10 sampled Gumbel distributions from the GP posterior +# +# The GP posterior captures uncertainty about the true `loc` and `scale` parameters at +# this location. The grey lines show the range of plausible response distributions +# consistent with the training data. When the surrogate has high confidence, these lines +# cluster tightly around the mean; when uncertain, they spread out. + +# %% +# Predict at x = [0.5, 0.5] +test_x = {"x1": 0.5, "x2": 0.5} +pred_mean, pred_covariance = botorch_model_bridge.predict([ObservationFeatures(parameters=test_x)]) + +# Get simulator samples at the same point +simulator_samples = sim(np.array([[0.5, 0.5]]), n_simulations_per_point=200).flatten() + +# Mean prediction +pred_dist_mean = dist(loc=pred_mean["loc"], scale=pred_mean["scale"]) +x_points = np.linspace(simulator_samples.min(), simulator_samples.max(), 100) + +# Sample from GP posterior to show uncertainty +mean = np.array([pred_mean["loc"], pred_mean["scale"]]).flatten() +covariance = np.array( + [ + [pred_covariance["loc"]["loc"], pred_covariance["loc"]["scale"]], + [pred_covariance["scale"]["loc"], pred_covariance["scale"]["scale"]], + ] +).reshape(2, 2) + +surrogate_distribution = scipy.stats.multivariate_normal(mean, covariance) +posterior_samples = surrogate_distribution.rvs(size=10, random_state=42) + +# Plot +plt.figure(figsize=(10, 4)) +plt.hist(simulator_samples, bins=20, density=True, alpha=0.6, label="Simulator", edgecolor="black") + +# Plot posterior samples with transparency +for i, sample in enumerate(posterior_samples): + sample_dist = dist(loc=sample[0], scale=sample[1]) + if i == 0: + plt.plot( + x_points, + sample_dist.pdf(x_points), # type: ignore[union-attr] + "grey", + linewidth=1, + alpha=0.5, + label="GP Surrogate posterior samples", + ) + else: + plt.plot(x_points, sample_dist.pdf(x_points), "grey", linewidth=1, alpha=0.5) # type: ignore[union-attr] + +# Plot mean prediction on top +plt.plot(x_points, pred_dist_mean.pdf(x_points), "r-", linewidth=2, label="GP Surrogate (mean)") # type: ignore[union-attr] + +plt.xlabel("Response value") +plt.ylabel("Density") +plt.title("Surrogate vs Simulator at x = [0.5, 0.5]") +plt.legend() +plt.grid(alpha=0.3) +plt.show() + +print(f"Surrogate prediction: loc={pred_mean['loc'][0]:.3f}, scale={pred_mean['scale'][0]:.3f}") + +# %% [markdown] +# ## Step 4: Estimate the QoI from the Surrogate +# +# Now that we have a surrogate model, we can use it to estimate our **Quantity of +# Interest** (the median of the ERD) **without repeatedly calling the expensive +# simulator**. +# +# The key idea: +# +# - The surrogate is fast to evaluate. +# - We use it to *emulate* what the simulator would do over many periods of environment samples. +# - We propagate both: +# - **environment randomness**, and +# - **surrogate uncertainty** +# into our QoI estimate. +# +# ### 4.1 Environment sampling via DataLoader +# +# We wrap our environment samples in a small `Dataset` and `DataLoader`: +# +# - This lets the QoI estimator iterate over batches of environment points. +# - We can easily control how many total environment samples we use for the QoI calculation. +# - Different runs (with different seeds) can use different resampled subsets if desired. + +# %% +# Set up environment data loader +n_env_samples = 4_000 +dataset = MinimalDataset(env_data) +sampler = FixedRandomSampler(dataset, num_samples=n_env_samples, seed=10, replacement=True) +dataloader = DataLoader(dataset, sampler=sampler, batch_size=256) + +# %% [markdown] +# ### 4.2 QoI estimator: MarginalCDFExtrapolation +# +# We use Axtreme's `MarginalCDFExtrapolation` as a ready-made QoI estimator that efficiently +# computes the QoI from the surrogate model while propagating uncertainty from the GP posterior. +# +# The result is a **distribution over the QoI**, not just a single number. + +# %% +# Create QoI estimator +qoi_estimator = MarginalCDFExtrapolation( + env_iterable=dataloader, + period_len=N_ENV_SAMPLES_PER_PERIOD, + quantile=torch.tensor(0.5), # Median + quantile_accuracy=torch.tensor(0.01), + posterior_sampler=UTSampler(), +) + +print("✓ QoI estimator configured") + +# %% [markdown] +# ### 4.3 Effect of training data size +# +# To see the impact of more simulator data, we repeat: +# +# - Use `N` training points to fit a surrogate +# - Use that surrogate to estimate the QoI distribution +# +# for several values of `N` (e.g. 30, 50, 128). +# +# For each `N` we: +# +# - Approximate the QoI distribution with a Normal using the sampler's `mean` and `var` methods +# - Plot the corresponding PDF +# - Overlay the **brute-force QoI** as a vertical line +# +# Results: +# +# - With **few** training points, the QoI distribution is **wide** (high uncertainty). +# - As we add more training points, the distribution becomes **sharper** (narrower), +# and its mean stays close to the brute-force value. +# +# This confirms that: +# +# > The surrogate-based QoI estimator converges towards the brute-force answer as more simulator data is added. + +# %% +# Train surrogates with different numbers of training points +n_training_points = [30, 64, 128] +results = [] + +for n_points in n_training_points: + # Create and train experiment + exp_temp = make_exp() + add_sobol_points_to_experiment(exp_temp, n_iter=n_points, seed=8) + + model_bridge = Models.BOTORCH_MODULAR(experiment=exp_temp, data=exp_temp.fetch_data()) + + # Set up transforms + input_transform, outcome_transform = transforms.ax_to_botorch_transform_input_output( + transforms=list(model_bridge.transforms.values()), outcome_names=model_bridge.outcomes + ) + qoi_estimator.input_transform = input_transform + qoi_estimator.outcome_transform = outcome_transform + + # Estimate QoI + model = model_bridge.model.surrogate.model + result = qoi_estimator(model) + results.append(result) + + print(f"✓ Computed QoI with {n_points} training points") + +# %% [markdown] +# Let's see how the QoI estimate improves with more training data: + +# %% +# Visualize QoI estimates +fig, axes = plt.subplots(nrows=len(n_training_points), figsize=(10, 3 * len(n_training_points)), sharex=True) + +for ax, estimate, n_points in zip(axes, results, n_training_points, strict=True): + # Extract mean and variance from the estimate + # Cast to UTSampler to access mean/var methods (not part of PosteriorSampler protocol) + ut_sampler = qoi_estimator.posterior_sampler + assert isinstance(ut_sampler, UTSampler) + qoi_mean = ut_sampler.mean(torch.tensor(estimate), -1) + qoi_var = ut_sampler.var(torch.tensor(estimate), -1) + qoi_dist = Normal(qoi_mean, qoi_var**0.5) + + # Plot QoI distribution + x_range = torch.linspace(float(qoi_mean - 3 * qoi_var**0.5), float(qoi_mean + 3 * qoi_var**0.5), 200) + ax.plot(x_range.numpy(), torch.exp(qoi_dist.log_prob(x_range)).numpy(), "b-", linewidth=2, label="QoI estimate") + ax.fill_between(x_range.numpy(), 0, torch.exp(qoi_dist.log_prob(x_range)).numpy(), alpha=0.3) + + # Add brute force reference + ax.axvline(brute_force_qoi_estimate, color="red", linestyle="--", linewidth=2, label="Brute force") + + ax.set_title(f"QoI Estimate with {n_points} Training Points") + ax.set_ylabel("Density") + ax.legend() + ax.grid(alpha=0.3) + +axes[-1].set_xlabel("Response") +plt.tight_layout() +plt.show() + +# %% [markdown] +# ## 5. Design of Experiments (DoE) +# +# In the earlier sections, we saw that adding more simulator training data improves our +# surrogate model and reduces the uncertainty of our **Quantity of Interest (QoI)** +# estimate. However, **where** we choose to evaluate the simulator matters just as much +# as **how many** points we evaluate. +# +# Design of Experiments (DoE) helps us choose simulator inputs intelligently, so that +# each new simulation gives us the **maximum possible reduction in QoI uncertainty**. +# +# In this section we compare: +# +# - a baseline approach (random Sobol sampling), and +# - a QoI-aware approach that uses Axtreme's `QoILookAhead` acquisition function to pick the most informative points. +# +# We will track the QoI after each iteration to see which strategy converges faster. +# +# ### 5.1 QoI tracking metric and stopping criteria +# +# To monitor the progress of the DoE, we attach a special metric — `QoIMetric` — to +# the Ax experiment. After each iteration: +# +# - The surrogate model is updated. +# - The QoI is re-estimated using the updated surrogate. +# - The QoI estimate (mean and standard error) is stored inside the experiment's data. +# +# This allows us to plot the QoI over time and optionally stop early when the QoI +# uncertainty becomes sufficiently small. +# +# We implement: +# +# 1. **QoI tracking metric**: Automatically computes and logs the QoI after each +# surrogate update +# 2. **Stopping criterion** (`sem_stopping_criteria`): Checks if the standard error of +# the QoI drops below a threshold (e.g., 0.02) +# 3. **Trial runner** (`run_trials`): A helper function that: +# - Runs warm-up trials using a space-filling design (e.g., Sobol) +# - Then runs DoE trials using a specified acquisition strategy +# - Checks the stopping criterion after each DoE iteration +# - Returns early if the QoI uncertainty is sufficiently small +# +# This approach avoids unnecessary simulator calls once the QoI estimate is sufficiently accurate. + +# %% [markdown] + +# %% +# Define QoI tracking metric +QOI_METRIC = QoIMetric( + name="QoIMetric", + qoi_estimator=qoi_estimator, + minimum_data_points=3, # don't compute QoI until some data exists + attach_transforms=True, +) + + +# Define stopping criteria based on standard error of the mean (SEM) +def sem_stopping_criteria(experiment: Experiment, sem_threshold: float = 0.02, metric_name: str = "QoIMetric") -> bool: + """Stop when the standard error of the QoI estimate drops below a threshold.""" + metrics = experiment.fetch_data() + df = metrics.df + qoi_rows = df[df["metric_name"] == metric_name] + + if len(qoi_rows) == 0: + # No QoI yet + return False + + latest = qoi_rows.iloc[-1] + sem = latest["sem"] + + # Stop if SEM is finite and sufficiently small + return (sem is not None) and (not np.isnan(sem)) and (sem <= sem_threshold) + + +# Define function to run trials with progress output +def run_trials( + experiment: Experiment, + warm_up_generator: Callable[[Experiment], GeneratorRun], + doe_generator: Callable[[Experiment], GeneratorRun], + warm_up_runs: int = 3, + doe_runs: int = 15, + stopping_criteria: Callable[[Experiment], bool] | None = None, +) -> int: + """Run warm-up + DoE trials with simple progress output.""" + + total_iters = warm_up_runs + doe_runs + print(f"Starting DoE run: {warm_up_runs} warm-up + {doe_runs} DoE iterations") + print("-" * 60) + + for i in range(total_iters): + phase = "Warm-up" if i < warm_up_runs else "DoE" + + # Select generator + gen_fn = warm_up_generator if i < warm_up_runs else doe_generator + gen = gen_fn(experiment) + + # Run trial + trial = experiment.new_trial(gen) + trial.run() + trial.mark_completed() + + # Print progress + print(f"[{i + 1}/{total_iters}] {phase} iteration {i + 1} completed") + + # Stopping criteria only valid during DoE + if (i >= warm_up_runs) and (stopping_criteria is not None) and stopping_criteria(experiment): + print(f"✓ Stopping criterion met after {i - warm_up_runs + 1} DoE iterations.") + print("-" * 60) + return i + 1 + + print("✓ DoE run completed.") + print("-" * 60) + return total_iters + + +# %% [markdown] +# ### 5.2 Baseline: Sobol-only DoE +# +# As a baseline, we perform DoE using only Sobol points: +# +# - Sobol sequences are space-filling and require no modelling assumptions. +# - They cover the full input domain evenly. +# - They do not focus sampling effort on regions that matter most for the QoI. +# +# The Sobol-only experiment therefore provides a fair reference point to compare +# against more advanced acquisition strategies. +# +# We: +# +# 1. Create a new Ax experiment with the same simulator, search space, and noise model as before. +# 2. Attach the QoI tracking metric. +# 3. Run: +# - a few warm-up Sobol points, followed by +# - many DoE Sobol points. +# 4. Record the number of iterations performed (possibly fewer if the stopping criterion triggers early). +# +# This gives us a curve describing how the QoI estimate improves when sampling is uniform and naive. + +# %% +# Baseline: Sobol-only DoE +exp_sobol = make_exp() +exp_sobol.add_tracking_metric(QOI_METRIC) + +# Sobol generator (kept outside the loop so its internal state persists) +sobol = Models.SOBOL(search_space=exp_sobol.search_space, seed=5) + + +# Sobol generator function +def sobol_generator_run(_: Experiment) -> GeneratorRun: + return sobol.gen(1) + + +print("Running Sobol-only experiment...") +n_sobol_iters = run_trials( + experiment=exp_sobol, + warm_up_generator=sobol_generator_run, + doe_generator=sobol_generator_run, + warm_up_runs=3, + doe_runs=100, + stopping_criteria=sem_stopping_criteria, +) + +# %% [markdown] +# ### 5.3 QoI-aware DoE with `QoILookAhead` +# +# Randomly exploring the search space is wasteful when our goal is to reduce QoI +# uncertainty, not to approximate the simulator everywhere equally. +# +# To address this, Axtreme provides a custom acquisition function, `QoILookAhead`. +# +# This acquisition function: +# +# - Uses the current GP surrogate. +# - Anticipates how much adding a new training point to the surrogate will reduce the QoI uncertainty. +# - Selects the input location where this reduction is expected to be largest. +# +# Conceptually, it prioritizes simulator points that are most influential in shaping +# the extreme response distribution. +# +# This is significantly more efficient than Sobol sampling, especially when the QoI +# depends heavily on only a subset of the input space. +# +# To run QoI-aware DoE: +# +# 1. For each DoE iteration, we build a BoTorch model bridge equipped with the +# `QoILookAhead` acquisition. +# 2. We use Ax's built-in acquisition optimizer (for example, Nelder-Mead for +# robustness in non-smooth cases). +# 3. The best acquisition point becomes the next simulator evaluation. +# 4. After each point, the QoI is re-estimated and logged by the tracking metric. +# +# This creates a feedback loop where the model learns faster in QoI-critical regions. + +# %% +# QoI-aware DoE with QoILookAhead acquisition + +# Choose the acquisition function +acquisition_function_class = QoILookAhead + + +# Define QoILookAhead DoE generator function +def look_ahead_generator_run(experiment: Experiment) -> GeneratorRun: + """Generate a new point by optimising the QoILookAhead acquisition.""" + + # First: build a model bridge to recover the Ax→BoTorch transforms + assert experiment.optimization_config is not None + model_bridge_for_transforms = Models.BOTORCH_MODULAR( + experiment=experiment, + data=experiment.fetch_data(metrics=list(experiment.optimization_config.metrics.values())), + fit_tracking_metrics=False, + ) + + input_transform, outcome_transform = transforms.ax_to_botorch_transform_input_output( + transforms=list(model_bridge_for_transforms.transforms.values()), + outcome_names=model_bridge_for_transforms.outcomes, + ) + + # Feed the transforms into the QoI estimator so it interprets inputs/outputs correctly + qoi_estimator.input_transform = input_transform + qoi_estimator.outcome_transform = outcome_transform + + # Build a model bridge with a custom acquisition function + model_bridge_with_acq = Models.BOTORCH_MODULAR( + experiment=experiment, + data=experiment.fetch_data(), + botorch_acqf_class=acquisition_function_class, + fit_tracking_metrics=False, + acquisition_options={ + "qoi_estimator": qoi_estimator, + "sampler": sampling.MeanSampler(), + }, + ) + + # Optimise the acquisition to find the next candidate + return model_bridge_with_acq.gen( + 1, + model_gen_options={ + "optimizer_kwargs": { + "num_restarts": 20, + "raw_samples": 50, + "options": { + "with_grad": False, # QoILookAhead may not be smooth + "method": "Nelder-Mead", + "maxfev": 5, + }, + "retry_on_optimization_warning": False, + } + }, + ) + + +# Run the QoI-aware DoE +exp_look_ahead = make_exp() +exp_look_ahead.add_tracking_metric(QOI_METRIC) + +# Sobol generator for warm-up (kept outside the loop so its internal state persists) +sobol_for_warmup = Models.SOBOL(search_space=exp_look_ahead.search_space, seed=5) + + +# Sobol generator function for warm-up +def sobol_warmup_run(_: Experiment) -> GeneratorRun: + return sobol_for_warmup.gen(1) + + +print("Running QoI-aware look-ahead experiment...") +n_lookahead_iters = run_trials( + experiment=exp_look_ahead, + warm_up_generator=sobol_warmup_run, + doe_generator=look_ahead_generator_run, + warm_up_runs=3, + doe_runs=40, + stopping_criteria=sem_stopping_criteria, +) + +# %% [markdown] +# ### 5.4 Comparing Sobol and QoI-aware DoE +# +# Finally, we compare results from both strategies. +# +# Using Axtreme's `plot_qoi_estimates_from_experiment`, we visualize: +# +# - The QoI estimate at each iteration. +# - The uncertainty bands (SEM). +# - The brute-force QoI as a reference line. +# - Curves for both: +# - Sobol-only DoE, and +# - QoILookAhead-based DoE. +# +# This makes it easy to see: +# +# - How quickly each method reduces QoI uncertainty. +# - Whether either method shows bias. +# - How many simulator evaluations are required to reach a target confidence level. +# +# In typical problems, we expect: +# +# - Sobol: uncertainty decreases steadily but relatively slowly. +# - QoILookAhead: uncertainty drops much faster because it samples only where the QoI +# is sensitive. +# +# In real applications, this difference can translate to far fewer simulator evaluations +# for similar or better QoI accuracy. +# +# This completes the end-to-end Axtreme workflow: from defining a simulation problem, +# through surrogate modeling and QoI estimation, to intelligent experiment design. + +# %% +# 5.4 Compare QoI evolution + +ax = plot_qoi_estimates_from_experiment(exp_sobol, name="Sobol") +ax = plot_qoi_estimates_from_experiment( + exp_look_ahead, + ax=ax, + color="green", + name="QoI look-ahead", +) + +ax.axhline(float(brute_force_qoi_estimate), color="black", linestyle="--", label="Brute-force QoI") + +ax.set_xlabel("Number of DoE iterations") +ax.set_ylabel("QoI (median extreme response)") +ax.legend() +ax.grid(alpha=0.3) + +plt.show() + +# %% [markdown] +# ### 5.5 Spatial Distribution of Selected Points +# +# To understand **where** each DoE strategy chooses to evaluate the simulator, we +# visualize the selected points in the 2D input space overlaid on: +# +# 1. **Environment density**: Shows the distribution of environment samples +# (grey histogram) +# 2. **Extreme response density**: Shows where extreme responses originated during +# brute-force simulation (red histogram) +# 3. **Combined view**: Both densities overlaid to reveal the relationship +# +# Key insights: +# +# - **Sobol points (blue circles)**: Distributed uniformly across the entire input +# space, regardless of where extremes occur +# - **QoI-aware points (red triangles)**: Concentrated in regions that contribute most +# to extreme responses +# +# The point labels indicate the order in which they were selected during DoE. +# +# This visualization reveals why QoI-aware DoE is more efficient: it focuses simulator +# evaluations on the critical regions of the input space where the QoI is most +# sensitive. + + +# %% +def plot_2dtrials( + exp: Experiment, ax: Axes | None = None, colour: str = "blue", marker: str = "o", label: str | None = None +) -> Axes: + """Plot the points and number the datapoints added over DoE.""" + if ax is None: + _, ax = plt.subplots(figsize=(8, 6)) + + trials = [] + trial_indices = [] + + for trial_idx, trial in exp.trials.items(): + if isinstance(trial, Trial) and trial.arm: + params = trial.arm.parameters.values() + trials.append(list(params)) + trial_indices.append(trial_idx) + + points = np.array(trials) + ax.scatter(points[:, 0], points[:, 1], alpha=0.8, s=50, label=label, c=colour, marker=marker) + + for i, (x, y) in enumerate(points): + ax.annotate(str(trial_indices[i]), (x, y), xytext=(2, 2), textcoords="offset points", fontsize=8, color=colour) + + return ax + + +# Create three-panel visualization +fig, axes = plt.subplots(1, 3, figsize=(15, 5)) + +# Panel 1: Points vs Environment Density +axes[0].set_title("Points vs Environment Density") +axes[0].hist2d(env_data[:, 0], env_data[:, 1], bins=20, alpha=0.6, cmap="Greys", zorder=-1) + +# Panel 2: Points vs Extreme Response Density +axes[1].set_title("Points vs Extreme Response Density") +axes[1].hist2d(precalced_erd_x[:, 0], precalced_erd_x[:, 1], bins=40, alpha=0.6, cmap="Purples", zorder=-1) + +# Panel 3: Points vs Combined Density +axes[2].set_title("Points vs Env and Extreme Response Density") +axes[2].hist2d(env_data[:, 0], env_data[:, 1], bins=20, alpha=0.6, cmap="Greys", zorder=-1) +axes[2].hist2d(precalced_erd_x[:, 0], precalced_erd_x[:, 1], bins=40, alpha=0.5, cmap="Reds", zorder=-1) + +# Add DoE points to all panels +for ax in axes: + ax.set_xlabel("x1") + ax.set_ylabel("x2") + plot_2dtrials(exp_sobol, colour="blue", ax=ax, label="Sobol") + plot_2dtrials(exp_look_ahead, colour="red", ax=ax, label="QoI look-ahead", marker="^") + ax.legend() + ax.grid(visible=True, alpha=0.3) + +plt.tight_layout() +plt.show() + +# %% diff --git a/tutorials/img/axtreme_workflow.png b/tutorials/img/axtreme_workflow.png new file mode 100644 index 00000000..795830a2 Binary files /dev/null and b/tutorials/img/axtreme_workflow.png differ