From da4f376e20c64fa338f6e28564ee17d12cf73ce2 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 29 Aug 2024 19:40:59 +0200 Subject: [PATCH] optim: Switch to just additive kernel --- .../bayesian_optimization/models/gp.py | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/neps/optimizers/bayesian_optimization/models/gp.py b/neps/optimizers/bayesian_optimization/models/gp.py index 307f806b..d8709c2a 100644 --- a/neps/optimizers/bayesian_optimization/models/gp.py +++ b/neps/optimizers/bayesian_optimization/models/gp.py @@ -5,12 +5,12 @@ from functools import reduce from typing import TYPE_CHECKING, Any, Mapping, TypeVar +from botorch.models import MultiTaskGP import gpytorch import gpytorch.constraints import torch from botorch.acquisition.analytic import SingleTaskGP -from botorch.models import MixedSingleTaskGP -from botorch.models.gp_regression_mixed import CategoricalKernel, Likelihood +from botorch.models.gp_regression_mixed import CategoricalKernel, Likelihood, MixedSingleTaskGP from botorch.models.transforms.outcome import Standardize from botorch.optim import optimize_acqf, optimize_acqf_mixed from gpytorch.kernels import MaternKernel, ScaleKernel @@ -190,31 +190,29 @@ def default_single_obj_gp( return gp, likelihood # Mixed - def cont_kernel_factory( - batch_shape: torch.Size, - ard_num_dims: int, - active_dims: list[int], - ) -> ScaleKernel: - lengthscale_prior, lengthscale_constraint = default_lengthscale_prior( - ard_num_dims - ) - return ScaleKernel( - MaternKernel( - nu=2.5, - batch_shape=batch_shape, - ard_num_dims=ard_num_dims, - active_dims=active_dims, - lengthscale_prior=lengthscale_prior, - lengthscale_constraint=lengthscale_constraint, - ), - ) + numeric_kernel = default_matern_kernel(len(numerics), active_dims=tuple(numerics)) + cat_kernel = default_categorical_kernel( + len(categoricals), active_dims=tuple(categoricals) + ) + + # WARNING: I previously tried SingleTaskMixedGp which does the following: + # + # x K((x1, c1), (x2, c2)) = + # x K_cont_1(x1, x2) + K_cat_1(c1, c2) + + # x K_cont_2(x1, x2) * K_cat_2(c1, c2) + # + # In a toy example with a single binary categorical which acted like F * {0, 1}, + # the model collapsed to always predicting `0`. Causing all parameters defining F + # to essentially be guess at random. This is a lot more stable while testing... + # TODO: Figure out why... + kernel = numeric_kernel + cat_kernel - gp = MixedSingleTaskGP( + gp = SingleTaskGP( train_X=x.tensor, train_Y=y, - cat_dims=categoricals, + mean_module=default_mean(), likelihood=likelihood, - cont_kernel_factory=cont_kernel_factory, + covar_module=kernel, outcome_transform=Standardize(m=1), ) return gp, likelihood @@ -232,8 +230,8 @@ def optimize_acq( ) -> tuple[torch.Tensor, torch.Tensor]: acq_options = acq_options or {} - lower = [domain.lower for domain in encoder.domains.values()] - upper = [domain.upper for domain in encoder.domains.values()] + lower = [domain.lower for domain in encoder.domains] + upper = [domain.upper for domain in encoder.domains] bounds = torch.tensor([lower, upper], dtype=torch.float) cat_transformers = {