diff --git a/docs/source/variational.rst b/docs/source/variational.rst index 73354370a..cef92fd92 100644 --- a/docs/source/variational.rst +++ b/docs/source/variational.rst @@ -98,19 +98,23 @@ These are special :obj:`~gpytorch.variational._VariationalStrategy` objects that :obj:`~gpytorch.distributions.MultitaskMultivariateNormal` distributions. Each of these objects acts on a batch of approximate GPs. - -:hidden:`IndependentMultitaskVariationalStrategy` +:hidden:`LMCVariationalStrategy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: IndependentMultitaskVariationalStrategy +.. autoclass:: LMCVariationalStrategy :members: -:hidden:`LMCVariationalStrategy` + .. automethod:: __call__ + + +:hidden:`IndependentMultitaskVariationalStrategy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: LMCVariationalStrategy +.. autoclass:: IndependentMultitaskVariationalStrategy :members: + .. automethod:: __call__ + Variational Distributions ----------------------------- diff --git a/examples/04_Variational_and_Approximate_GPs/SVGP_Multitask_GP_Regression.ipynb b/examples/04_Variational_and_Approximate_GPs/SVGP_Multitask_GP_Regression.ipynb index 1eabd15d5..fd5e2433c 100644 --- a/examples/04_Variational_and_Approximate_GPs/SVGP_Multitask_GP_Regression.ipynb +++ b/examples/04_Variational_and_Approximate_GPs/SVGP_Multitask_GP_Regression.ipynb @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -223,6 +223,64 @@ "Note that all the batch sizes for `IndependentMultitaskVariationalStrategy` are now `num_tasks` rather than `num_latents`." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Output modes\n", + "\n", + "By default, `LMCVariationalStrategy` and `IndependentMultitaskVariationalStrategy` produce vector-valued outputs. In other words, they return a `MultitaskMultivariateNormal` distribution -- containing all task values for each input.\n", + "\n", + "This is similar to the ExactGP model described in the [multitask GP regression tutorial](../03_Multitask_Exact_GPs/Multitask_GP_Regression.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultitaskMultivariateNormal torch.Size([100, 4])\n" + ] + } + ], + "source": [ + "output = model(train_x)\n", + "print(output.__class__.__name__, output.event_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if each input is only associated **with a single task**, passing in the `task_indices` argument will specify which task to return for each input. The result will be a standard `MultivariateNormal` distribution -- where each output corresponds to each input's specified task.\n", + "\n", + "This is similar to the ExactGP model described in the [Hadamard multitask GP regression tutorial](../03_Multitask_Exact_GPs/Hadamard_Multitask_GP_Regression.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultivariateNormal torch.Size([5])\n" + ] + } + ], + "source": [ + "x = train_x[..., :5]\n", + "task_indices = torch.LongTensor([0, 1, 3, 2, 2])\n", + "output = model(x, task_indices=task_indices)\n", + "print(output.__class__.__name__, output.event_shape)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -234,20 +292,28 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/gpleiss/miniconda3/envs/gpytorch/lib/python3.7/site-packages/ipykernel_launcher.py:20: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4b82e029261e40d58aafeeb640063467", + "model_id": "fffd43a1fd554e129b68581b060b0755", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0, description='Epoch', max=500, style=ProgressStyle(description_width='initiā€¦" + "HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=500.0), HTML(value='')))" ] }, "metadata": {}, @@ -301,14 +367,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] diff --git a/gpytorch/variational/independent_multitask_variational_strategy.py b/gpytorch/variational/independent_multitask_variational_strategy.py index 1c4edf861..9fbb461c3 100644 --- a/gpytorch/variational/independent_multitask_variational_strategy.py +++ b/gpytorch/variational/independent_multitask_variational_strategy.py @@ -2,7 +2,10 @@ import warnings -from ..distributions import MultitaskMultivariateNormal +import torch + +from ..distributions import MultitaskMultivariateNormal, MultivariateNormal +from ..lazy import RootLazyTensor from ..module import Module from ._variational_strategy import _VariationalStrategy @@ -10,9 +13,12 @@ class IndependentMultitaskVariationalStrategy(_VariationalStrategy): """ IndependentMultitaskVariationalStrategy wraps an existing - :obj:`~gpytorch.variational.VariationalStrategy` - to produce a :obj:`~gpytorch.variational.MultitaskMultivariateNormal` distribution. - All outputs will be independent of one another. + :obj:`~gpytorch.variational.VariationalStrategy` to produce vector-valued (multi-task) + output distributions. Each task will be independent of one another. + + The output will either be a :obj:`~gpytorch.distributions.MultitaskMultivariateNormal` distribution + (if we wish to evaluate all tasks for each input) or a :obj:`~gpytorch.distributions.MultivariateNormal` + (if we wish to evaluate a single task for each input). The base variational strategy is assumed to operate on a batch of GPs. One of the batch dimensions corresponds to the multiple tasks. @@ -43,19 +49,46 @@ def variational_params_initialized(self): def kl_divergence(self): return super().kl_divergence().sum(dim=-1) - def __call__(self, x, prior=False, **kwargs): + def __call__(self, x, task_indices=None, prior=False, **kwargs): + r""" + See :class:`LMCVariationalStrategy`. + """ function_dist = self.base_variational_strategy(x, prior=prior, **kwargs) - if ( - self.task_dim > 0 - and self.task_dim > len(function_dist.batch_shape) - or self.task_dim < 0 - and self.task_dim + len(function_dist.batch_shape) < 0 - ): - return MultitaskMultivariateNormal.from_repeated_mvn(function_dist, num_tasks=self.num_tasks) + + if task_indices is None: + # Every data point will get an output for each task + if ( + self.task_dim > 0 + and self.task_dim > len(function_dist.batch_shape) + or self.task_dim < 0 + and self.task_dim + len(function_dist.batch_shape) < 0 + ): + return MultitaskMultivariateNormal.from_repeated_mvn(function_dist, num_tasks=self.num_tasks) + else: + function_dist = MultitaskMultivariateNormal.from_batch_mvn(function_dist, task_dim=self.task_dim) + assert function_dist.event_shape[-1] == self.num_tasks + return function_dist + else: - function_dist = MultitaskMultivariateNormal.from_batch_mvn(function_dist, task_dim=self.task_dim) - assert function_dist.event_shape[-1] == self.num_tasks - return function_dist + # Each data point will get a single output corresponding to a single task + + if self.task_dim > 0: + raise RuntimeError(f"task_dim must be a negative indexed batch dimension: got {self.task_dim}.") + num_batch = len(function_dist.batch_shape) + task_dim = num_batch + self.task_dim + + # Create a mask to choose specific task assignment + shape = list(function_dist.batch_shape + function_dist.event_shape) + shape[task_dim] = 1 + task_indices = task_indices.expand(shape).squeeze(task_dim) + + # Create a mask to choose specific task assignment + task_mask = torch.nn.functional.one_hot(task_indices, num_classes=self.num_tasks) + task_mask = task_mask.permute(*range(0, task_dim), *range(task_dim + 1, num_batch + 1), task_dim) + + mean = (function_dist.mean * task_mask).sum(task_dim) + covar = (function_dist.lazy_covariance_matrix * RootLazyTensor(task_mask[..., None])).sum(task_dim) + return MultivariateNormal(mean, covar) class MultitaskVariationalStrategy(IndependentMultitaskVariationalStrategy): diff --git a/gpytorch/variational/lmc_variational_strategy.py b/gpytorch/variational/lmc_variational_strategy.py index b3c43d25e..1e215affb 100644 --- a/gpytorch/variational/lmc_variational_strategy.py +++ b/gpytorch/variational/lmc_variational_strategy.py @@ -2,12 +2,34 @@ import torch -from ..distributions import MultitaskMultivariateNormal -from ..lazy import KroneckerProductLazyTensor, MatmulLazyTensor +from .. import settings +from ..distributions import MultitaskMultivariateNormal, MultivariateNormal +from ..lazy import KroneckerProductLazyTensor, RootLazyTensor from ..module import Module +from ..utils.broadcasting import _mul_broadcast_shape +from ..utils.interpolation import left_interp from ._variational_strategy import _VariationalStrategy +def _select_lmc_coefficients(lmc_coefficients: torch.Tensor, indices: torch.LongTensor) -> torch.Tensor: + """ + Given a list of indices for ... x N datapoints, + select the row from lmc_coefficient that corresponds to each datapoint + + lmc_coefficients: torch.Tensor ... x num_latents x ... x num_tasks + indices: torch.Tesnor ... x N + """ + batch_shape = _mul_broadcast_shape(lmc_coefficients.shape[:-1], indices.shape[:-1]) + + # We will use the left_interp helper to do the indexing + lmc_coefficients = lmc_coefficients.expand(*batch_shape, lmc_coefficients.shape[-1])[..., None] + indices = indices.expand(*batch_shape, indices.shape[-1])[..., None] + res = left_interp( + indices, torch.ones(indices.shape, dtype=torch.long, device=indices.device), lmc_coefficients, + ).squeeze(-1) + return res + + class LMCVariationalStrategy(_VariationalStrategy): r""" LMCVariationalStrategy is an implementation of the "Linear Model of Coregionalization" @@ -20,8 +42,11 @@ class LMCVariationalStrategy(_VariationalStrategy): f_{\text{task } i}( \mathbf x) = \sum_{q=1}^Q a_i^{(q)} g^{(q)} ( \mathbf x ) - LMCVariationalStrategy wraps an existing :obj:`~gpytorch.variational.VariationalStrategy` - to produce a :obj:`~gpytorch.variational.MultitaskMultivariateNormal` distribution. + LMCVariationalStrategy wraps an existing :obj:`~gpytorch.variational.VariationalStrategy`. + The output will either be a :obj:`~gpytorch.distributions.MultitaskMultivariateNormal` distribution + (if we wish to evaluate all tasks for each input) or a :obj:`~gpytorch.distributions.MultivariateNormal` + (if we wish to evaluate a single task for each input). + The base variational strategy is assumed to operate on a multi-batch of GPs, where one of the batch dimensions corresponds to the latent function dimension. @@ -35,13 +60,6 @@ class LMCVariationalStrategy(_VariationalStrategy): batch shape. This would correspond to each of the latent functions having different kernels or the same kernel, respectivly. - :param ~gpytorch.variational.VariationalStrategy base_variational_strategy: Base variational strategy - :param int num_tasks: The total number of tasks (output functions) - :param int num_latents: The total number of latent functions in each group - :param latent_dim: (Default: -1) Which batch dimension corresponds to the latent function batch. - **Must be negative indexed** - :type latent_dim: `int` < 0 - Example: >>> class LMCMultitaskGP(gpytorch.models.ApproximateGP): >>> ''' @@ -74,7 +92,13 @@ class LMCVariationalStrategy(_VariationalStrategy): >>> batch_shape=torch.Size([3]), >>> ) >>> - >>> # Model output: n x 5 + + :param ~gpytorch.variational.VariationalStrategy base_variational_strategy: Base variational strategy + :param int num_tasks: The total number of tasks (output functions) + :param int num_latents: The total number of latent functions in each group + :param latent_dim: (Default: -1) Which batch dimension corresponds to the latent function batch. + **Must be negative indexed** + :type latent_dim: `int` < 0 """ def __init__( @@ -120,28 +144,84 @@ def variational_params_initialized(self): def kl_divergence(self): return super().kl_divergence().sum(dim=self.latent_dim) - def __call__(self, x, prior=False, **kwargs): - function_dist = self.base_variational_strategy(x, prior=prior, **kwargs) - lmc_coefficients = self.lmc_coefficients.expand(*function_dist.batch_shape, self.lmc_coefficients.size(-1)) - num_batch = len(function_dist.batch_shape) - num_dim = num_batch + len(function_dist.event_shape) - latent_dim = num_batch + self.latent_dim if self.latent_dim is not None else None - - # Mean - mean = function_dist.mean.permute(*range(0, latent_dim), *range(latent_dim + 1, num_dim), latent_dim) - mean = mean @ lmc_coefficients.permute( - *range(0, latent_dim), *range(latent_dim + 1, num_dim - 1), latent_dim, -1 - ) - - # Covar - covar = function_dist.lazy_covariance_matrix - lmc_factor = MatmulLazyTensor(lmc_coefficients.unsqueeze(-1), lmc_coefficients.unsqueeze(-2)) - covar = KroneckerProductLazyTensor(covar, lmc_factor) - covar = covar.sum(latent_dim) - - # Add a bit of jitter to make the covar PD - covar = covar.add_jitter(1e-6) - - # Done! - function_dist = MultitaskMultivariateNormal(mean, covar) + def __call__(self, x, task_indices=None, prior=False, **kwargs): + r""" + Computes the variational (or prior) distribution + :math:`q( \mathbf f \mid \mathbf X)` (or :math:`p( \mathbf f \mid \mathbf X)`). + There are two modes: + + 1. Compute **all tasks** for all inputs. + If this is the case, the :attr:`task_indices` attribute should be None. + The return type will be a (... x N x num_tasks) + :class:`~gpytorch.distributions.MultitaskMultivariateNormal`. + 2. Compute **one task** per inputs. + If this is the case, the (... x N) :attr:`task_indices` tensor should contain + the indices of each input's assigned task. + The return type will be a (... x N) + :class:`~gpytorch.distributions.MultivariateNormal`. + + :param x: Input locations to evaluate variational strategy + :type x: torch.Tensor (... x N x D) + :param task_indices: (Default: None) Task index associated with each input. + If this **is not** provided, then the returned distribution evaluates every input on every task + (returns :class:`~gpytorch.distributions.MultitaskMultivariateNormal`). + If this **is** provided, then the returned distribution evaluates each input only on its assigned task. + (returns :class:`~gpytorch.distributions.MultivariateNormal`). + :type task_indices: torch.Tensor (... x N), optional + :param prior: (Default: False) If False, returns the variational distribution + :math:`q( \mathbf f \mid \mathbf X)`. + If True, returns the prior distribution + :math:`p( \mathbf f \mid \mathbf X)`. + :type prior: bool + :return: :math:`q( \mathbf f \mid \mathbf X)` (or the prior), + either for all tasks (if `task_indices == None`) + or for a specific task (if `task_indices != None`). + :rtype: ~gpytorch.distributions.MultitaskMultivariateNormal (... x N x num_tasks) + or ~gpytorch.distributions.MultivariateNormal (... x N) + """ + latent_dist = self.base_variational_strategy(x, prior=prior, **kwargs) + num_batch = len(latent_dist.batch_shape) + latent_dim = num_batch + self.latent_dim + + if task_indices is None: + num_dim = num_batch + len(latent_dist.event_shape) + + # Every data point will get an output for each task + # Therefore, we will set up the lmc_coefficients shape for a matmul + lmc_coefficients = self.lmc_coefficients.expand(*latent_dist.batch_shape, self.lmc_coefficients.size(-1)) + + # Mean: ... x N x num_tasks + latent_mean = latent_dist.mean.permute(*range(0, latent_dim), *range(latent_dim + 1, num_dim), latent_dim) + mean = latent_mean @ lmc_coefficients.permute( + *range(0, latent_dim), *range(latent_dim + 1, num_dim - 1), latent_dim, -1 + ) + + # Covar: ... x (N x num_tasks) x (N x num_tasks) + latent_covar = latent_dist.lazy_covariance_matrix + lmc_factor = RootLazyTensor(lmc_coefficients.unsqueeze(-1)) + covar = KroneckerProductLazyTensor(latent_covar, lmc_factor).sum(latent_dim) + # Add a bit of jitter to make the covar PD + covar = covar.add_jitter(settings.cholesky_jitter.value(dtype=mean.dtype)) + + # Done! + function_dist = MultitaskMultivariateNormal(mean, covar) + + else: + # Each data point will get a single output corresponding to a single task + # Therefore, we will select the appropriate lmc coefficients for each task + lmc_coefficients = _select_lmc_coefficients(self.lmc_coefficients, task_indices) + + # Mean: ... x N + mean = (latent_dist.mean * lmc_coefficients).sum(latent_dim) + + # Covar: ... x N x N + latent_covar = latent_dist.lazy_covariance_matrix + lmc_factor = RootLazyTensor(lmc_coefficients.unsqueeze(-1)) + covar = (latent_covar * lmc_factor).sum(latent_dim) + # Add a bit of jitter to make the covar PD + covar = covar.add_jitter(settings.cholesky_jitter.value(dtype=mean.dtype)) + + # Done! + function_dist = MultivariateNormal(mean, covar) + return function_dist diff --git a/test/examples/test_lmc_svgp_regression.py b/test/examples/test_lmc_svgp_regression.py index c2958e3bc..822bbecb0 100644 --- a/test/examples/test_lmc_svgp_regression.py +++ b/test/examples/test_lmc_svgp_regression.py @@ -7,7 +7,7 @@ import gpytorch import torch -from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood # Batch training test: Let's learn hyperparameters on a sine dataset, but test on a sine dataset and a cosine dataset @@ -75,7 +75,6 @@ def tearDown(self): torch.set_rng_state(self.rng_state) def test_train_and_eval(self): - # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=4) model = LMCModel() @@ -132,6 +131,57 @@ def test_train_and_eval(self): self.assertEqual(lower.shape, train_y.shape) self.assertEqual(upper.shape, train_y.shape) + def test_indexed_train_and_eval(self): + likelihood = GaussianLikelihood() + model = LMCModel() + + # Find optimal model hyperparameters + model.train() + likelihood.train() + optimizer = torch.optim.Adam([ + {'params': model.parameters()}, + {'params': likelihood.parameters()}, + ], lr=0.01) + + # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO + mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0)) + + # Create some task indices + arange = torch.arange(train_x.size(0)) + train_i = torch.rand(train_x.size(0)).mul(4).floor().long() + + # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less + # effective for VI. + for i in range(400): + # Within each iteration, we will go over each minibatch of data + optimizer.zero_grad() + output = model(train_x, task_indices=train_i) + loss = -mll(output, train_y[arange, train_i]) + loss.backward() + optimizer.step() + + for param in model.parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + for param in likelihood.parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + + # Test the model + model.eval() + likelihood.eval() + + # Make predictions for both sets of test points, and check MAEs. + with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): + predictions = likelihood(model(train_x, task_indices=train_i)) + mean_abs_error = torch.mean(torch.abs(train_y[arange, train_i] - predictions.mean)) + self.assertLess(mean_abs_error.squeeze().item(), 0.15) + + # Smoke test for getting predictive uncertainties + lower, upper = predictions.confidence_region() + self.assertEqual(lower.shape, train_i.shape) + self.assertEqual(upper.shape, train_i.shape) + if __name__ == "__main__": unittest.main() diff --git a/test/variational/test_independent_multitask_variational_strategy.py b/test/variational/test_independent_multitask_variational_strategy.py index c04e6e018..ab88f52ed 100644 --- a/test/variational/test_independent_multitask_variational_strategy.py +++ b/test/variational/test_independent_multitask_variational_strategy.py @@ -8,10 +8,14 @@ from gpytorch.test.variational_test_case import VariationalTestCase -def likelihood_cls(): +def multitask_likelihood_cls(): return gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2) +def singletask_likelihood_cls(): + return gpytorch.likelihoods.GaussianLikelihood() + + def strategy_cls(model, inducing_points, variational_distribution, learn_inducing_locations): return gpytorch.variational.IndependentMultitaskVariationalStrategy( gpytorch.variational.VariationalStrategy( @@ -36,7 +40,7 @@ def distribution_cls(self): @property def likelihood_cls(self): - return likelihood_cls + return multitask_likelihood_cls @property def mll_cls(self): @@ -49,22 +53,12 @@ def strategy_cls(self): def test_training_iteration(self, *args, expected_batch_shape=None, **kwargs): expected_batch_shape = expected_batch_shape or self.batch_shape expected_batch_shape = expected_batch_shape[:-1] - cg_mock, cholesky_mock, ciq_mock = super().test_training_iteration( - *args, expected_batch_shape=expected_batch_shape, **kwargs - ) - self.assertFalse(cg_mock.called) - self.assertFalse(ciq_mock.called) - self.assertEqual(cholesky_mock.call_count, 2) # One for each forward pass + super().test_training_iteration(*args, expected_batch_shape=expected_batch_shape, **kwargs) def test_eval_iteration(self, *args, expected_batch_shape=None, **kwargs): expected_batch_shape = expected_batch_shape or self.batch_shape expected_batch_shape = expected_batch_shape[:-1] - cg_mock, cholesky_mock, ciq_mock = super().test_eval_iteration( - *args, expected_batch_shape=expected_batch_shape, **kwargs - ) - self.assertFalse(cg_mock.called) - self.assertFalse(ciq_mock.called) - self.assertEqual(cholesky_mock.call_count, 1) # One to compute cache, that's it! + super().test_eval_iteration(*args, expected_batch_shape=expected_batch_shape, **kwargs) class TestMultitaskPredictiveGP(TestMultitaskVariationalGP): @@ -115,5 +109,115 @@ def distribution_cls(self): return gpytorch.variational.DeltaVariationalDistribution +class TestIndexedMultitaskVariationalGP(TestMultitaskVariationalGP, unittest.TestCase): + def _training_iter( + self, model, likelihood, batch_shape=torch.Size([]), mll_cls=gpytorch.mlls.VariationalELBO, cuda=False + ): + batch_shape = list(batch_shape) + batch_shape[-1] = 1 + train_x = torch.randn(*batch_shape, 32, 2).clamp(-2.5, 2.5) + train_i = torch.rand(*batch_shape, 32).round().long() + train_y = torch.linspace(-1, 1, self.event_shape[0]) + train_y = train_y.view(self.event_shape[0], *([1] * (len(self.event_shape) - 1))) + train_y = train_y.expand(*self.event_shape) + mll = mll_cls(likelihood, model, num_data=train_x.size(-2)) + if cuda: + train_x = train_x.cuda() + train_i = train_i.cuda() + train_y = train_y.cuda() + model = model.cuda() + likelihood = likelihood.cuda() + + # Single optimization iteration + model.train() + likelihood.train() + output = model(train_x, task_indices=train_i) + loss = -mll(output, train_y) + loss.sum().backward() + + # Make sure we have gradients for all parameters + for _, param in model.named_parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + for _, param in likelihood.named_parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + + return output, loss + + def _eval_iter(self, model, batch_shape=torch.Size([]), cuda=False): + batch_shape = list(batch_shape) + batch_shape[-1] = 1 + test_x = torch.randn(*batch_shape, 32, 2).clamp(-2.5, 2.5) + test_i = torch.rand(*batch_shape, 32).round().long() + if cuda: + test_x = test_x.cuda() + test_i = test_i.cuda() + model = model.cuda() + + # Single optimization iteration + model.eval() + with torch.no_grad(): + output = model(test_x, task_indices=test_i) + + return output + + @property + def event_shape(self): + return torch.Size([32]) + + @property + def likelihood_cls(self): + return singletask_likelihood_cls + + +class TestIndexedMultitaskPredictiveGP(TestIndexedMultitaskVariationalGP): + @property + def mll_cls(self): + return gpytorch.mlls.PredictiveLogLikelihood + + +class TestIndexedMultitaskRobustVGP(TestIndexedMultitaskVariationalGP): + @property + def mll_cls(self): + return gpytorch.mlls.GammaRobustVariationalELBO + + +class TestMeanFieldIndexedMultitaskVariationalGP(TestIndexedMultitaskVariationalGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestMeanFieldIndexedMultitaskPredictiveGP(TestIndexedMultitaskPredictiveGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestMeanFieldIndexedMultitaskRobustVGP(TestIndexedMultitaskRobustVGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestDeltaIndexedMultitaskVariationalGP(TestIndexedMultitaskVariationalGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + +class TestDeltaIndexedMultitaskPredictiveGP(TestIndexedMultitaskPredictiveGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + +class TestDeltaIndexedMultitaskRobustVGP(TestIndexedMultitaskRobustVGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + if __name__ == "__main__": unittest.main() diff --git a/test/variational/test_lmc_variational_strategy.py b/test/variational/test_lmc_variational_strategy.py index e1d9ad10a..2e5255200 100644 --- a/test/variational/test_lmc_variational_strategy.py +++ b/test/variational/test_lmc_variational_strategy.py @@ -8,10 +8,14 @@ from gpytorch.test.variational_test_case import VariationalTestCase -def likelihood_cls(): +def multitask_likelihood_cls(): return gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=4) +def singletask_likelihood_cls(): + return gpytorch.likelihoods.GaussianLikelihood() + + def strategy_cls(model, inducing_points, variational_distribution, learn_inducing_locations): return gpytorch.variational.LMCVariationalStrategy( gpytorch.variational.VariationalStrategy( @@ -38,7 +42,7 @@ def distribution_cls(self): @property def likelihood_cls(self): - return likelihood_cls + return multitask_likelihood_cls @property def mll_cls(self): @@ -167,5 +171,111 @@ def distribution_cls(self): return gpytorch.variational.DeltaVariationalDistribution +class TestIndexedLMCVariationalGP(TestLMCVariationalGP, unittest.TestCase): + def _training_iter( + self, model, likelihood, batch_shape=torch.Size([]), mll_cls=gpytorch.mlls.VariationalELBO, cuda=False + ): + train_x = torch.randn(*batch_shape, 32, 2).clamp(-2.5, 2.5) + train_i = torch.rand(*batch_shape, 32).round().long() + train_y = torch.linspace(-1, 1, self.event_shape[0]) + train_y = train_y.view(self.event_shape[0], *([1] * (len(self.event_shape) - 1))) + train_y = train_y.expand(*self.event_shape) + mll = mll_cls(likelihood, model, num_data=train_x.size(-2)) + if cuda: + train_x = train_x.cuda() + train_i = train_i.cuda() + train_y = train_y.cuda() + model = model.cuda() + likelihood = likelihood.cuda() + + # Single optimization iteration + model.train() + likelihood.train() + output = model(train_x, task_indices=train_i) + loss = -mll(output, train_y) + loss.sum().backward() + + # Make sure we have gradients for all parameters + for _, param in model.named_parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + for _, param in likelihood.named_parameters(): + self.assertTrue(param.grad is not None) + self.assertGreater(param.grad.norm().item(), 0) + + return output, loss + + def _eval_iter(self, model, batch_shape=torch.Size([]), cuda=False): + test_x = torch.randn(*batch_shape, 32, 2).clamp(-2.5, 2.5) + test_i = torch.rand(*batch_shape, 32).round().long() + if cuda: + test_x = test_x.cuda() + test_i = test_i.cuda() + model = model.cuda() + + # Single optimization iteration + model.eval() + with torch.no_grad(): + output = model(test_x, task_indices=test_i) + + return output + + @property + def event_shape(self): + return torch.Size([32]) + + @property + def likelihood_cls(self): + return singletask_likelihood_cls + + +class TestIndexedLMCPredictiveGP(TestIndexedLMCVariationalGP): + @property + def mll_cls(self): + return gpytorch.mlls.PredictiveLogLikelihood + + +class TestIndexedLMCRobustVGP(TestIndexedLMCVariationalGP): + @property + def mll_cls(self): + return gpytorch.mlls.GammaRobustVariationalELBO + + +class TestMeanFieldIndexedLMCVariationalGP(TestIndexedLMCVariationalGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestMeanFieldIndexedLMCPredictiveGP(TestIndexedLMCPredictiveGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestMeanFieldIndexedLMCRobustVGP(TestIndexedLMCRobustVGP): + @property + def distribution_cls(self): + return gpytorch.variational.MeanFieldVariationalDistribution + + +class TestDeltaIndexedLMCVariationalGP(TestIndexedLMCVariationalGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + +class TestDeltaIndexedLMCPredictiveGP(TestIndexedLMCPredictiveGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + +class TestDeltaIndexedLMCRobustVGP(TestIndexedLMCRobustVGP): + @property + def distribution_cls(self): + return gpytorch.variational.DeltaVariationalDistribution + + if __name__ == "__main__": unittest.main()