From 3078fbc8d7ed60e1d296b67437703e81c4793311 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Mon, 21 Oct 2024 09:47:05 +0200
Subject: [PATCH] refactor(SearchSpace): removes a lot of methods from
 `SearchSpace` (#150)

---
 .../acquisition_functions/ei.py               |   2 +-
 neps/optimizers/grid_search/optimizer.py      |   5 +-
 neps/optimizers/multi_fidelity/mf_bo.py       |  11 +-
 .../multi_fidelity/sampling_policy.py         |  50 ++++---
 .../multi_fidelity/successive_halving.py      |  30 ++--
 .../multi_fidelity_prior/priorband.py         |   4 +-
 neps/optimizers/multi_fidelity_prior/utils.py |  25 ++--
 neps/optimizers/random_search/optimizer.py    |  20 +--
 neps/sampling/priors.py                       |  15 +-
 neps/sampling/samplers.py                     |  25 +++-
 neps/search_spaces/encoding.py                |   8 +-
 neps/search_spaces/functions.py               | 133 ++++++++++++++++++
 .../search_spaces/hyperparameters/constant.py |  14 --
 neps/search_spaces/search_space.py            | 119 ++++------------
 tests/test_config_encoder.py                  |   6 +-
 tests/test_search_space_functions.py          |  74 ++++++++++
 16 files changed, 361 insertions(+), 180 deletions(-)
 create mode 100644 neps/search_spaces/functions.py
 create mode 100644 tests/test_search_space_functions.py

diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
index 265e73bb..b8ee5f75 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
@@ -69,7 +69,7 @@ def eval(
         if len(space.fidelities) > 0 and self.optimize_on_max_fidelity:
             assert len(space.fidelities) == 1
             fid_name, fid = next(iter(space.fidelities.items()))
-            _x = [space.from_dict({**e.hp_values(), fid_name: fid.upper}) for e in x]
+            _x = [space.from_dict({**e._values, fid_name: fid.upper}) for e in x]
         else:
             _x = list(x)
 
diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py
index 9b3a3809..1da004e7 100644
--- a/neps/optimizers/grid_search/optimizer.py
+++ b/neps/optimizers/grid_search/optimizer.py
@@ -9,12 +9,9 @@
 import torch
 
 from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.search_spaces import Categorical, Constant, Float, Integer
 from neps.search_spaces.architecture.graph_grammar import GraphParameter
 from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN
-from neps.search_spaces.hyperparameters.categorical import Categorical
-from neps.search_spaces.hyperparameters.constant import Constant
-from neps.search_spaces.hyperparameters.float import Float
-from neps.search_spaces.hyperparameters.integer import Integer
 
 if TYPE_CHECKING:
     from neps.search_spaces.search_space import SearchSpace
diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py
index 6686a847..f4355585 100755
--- a/neps/optimizers/multi_fidelity/mf_bo.py
+++ b/neps/optimizers/multi_fidelity/mf_bo.py
@@ -4,10 +4,16 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, Literal
 
+from neps.search_spaces.functions import sample_one_old
+
 
 def update_fidelity(config: SearchSpace, fidelity: int | float) -> SearchSpace:
     assert config.fidelity is not None
     config.fidelity.set_value(fidelity)
+    # TODO: Place holder until we can get rid of passing around search spaces
+    # as configurations
+    assert config.fidelity_name is not None
+    config._values[config.fidelity_name] = fidelity
     return config
 
 
@@ -93,7 +99,7 @@ def _fit_models(self) -> None:
             train_y = deepcopy(self.rung_histories[rung]["perf"])
             # extract only the pending configurations that are at `rung`
             pending_df = pending_df[pending_df.rung == rung]
-            pending_x = deepcopy(pending_df.config.values.tolist())
+            pending_x = deepcopy(pending_df["config"].values.tolist())
             # update fidelity
             fidelities = [self.rung_map[rung]] * len(pending_x)
             pending_x = list(map(update_fidelity, pending_x, fidelities))
@@ -196,7 +202,8 @@ def sample_new_config(
         elif self.sampling_policy is not None:
             config = self.sampling_policy.sample(**self.sampling_args)
         else:
-            config = self.pipeline_space.sample(
+            config = sample_one_old(
+                self.pipeline_space,
                 patience=self.patience,
                 user_priors=self.use_priors,
                 ignore_fidelity=True,
diff --git a/neps/optimizers/multi_fidelity/sampling_policy.py b/neps/optimizers/multi_fidelity/sampling_policy.py
index 39acabaa..7e883e69 100644
--- a/neps/optimizers/multi_fidelity/sampling_policy.py
+++ b/neps/optimizers/multi_fidelity/sampling_policy.py
@@ -22,6 +22,7 @@
 from neps.sampling.priors import Prior
 from neps.sampling.samplers import Sampler
 from neps.search_spaces.encoding import ConfigEncoder
+from neps.search_spaces.functions import sample_one_old
 
 if TYPE_CHECKING:
     from botorch.acquisition.analytic import SingleTaskGP
@@ -64,8 +65,11 @@ def __init__(self, pipeline_space: SearchSpace):
         super().__init__(pipeline_space=pipeline_space)
 
     def sample(self, *args: Any, **kwargs: Any) -> SearchSpace:
-        return self.pipeline_space.sample(
-            patience=self.patience, user_priors=False, ignore_fidelity=True
+        return sample_one_old(
+            self.pipeline_space,
+            patience=self.patience,
+            user_priors=False,
+            ignore_fidelity=True,
         )
 
 
@@ -88,8 +92,12 @@ def sample(self, *args: Any, **kwargs: Any) -> SearchSpace:
         user_priors = False
         if np.random.uniform() < self.fraction_from_prior:
             user_priors = True
-        return self.pipeline_space.sample(
-            patience=self.patience, user_priors=user_priors, ignore_fidelity=True
+
+        return sample_one_old(
+            self.pipeline_space,
+            patience=self.patience,
+            user_priors=user_priors,
+            ignore_fidelity=True,
         )
 
 
@@ -140,9 +148,11 @@ def sample_neighbour(
         )
 
         while True:
-            # sampling a config
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=False
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=False,
+                ignore_fidelity=False,
             )
             # computing distance from incumbent
             d = compute_config_dist(config, incumbent)
@@ -188,8 +198,11 @@ def sample(  # noqa: PLR0912, C901, PLR0915
         logger.info(f"Sampling from {policy} with weights (i, p, r)={prob_weights}")
 
         if policy == "prior":
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=True
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=True,
+                ignore_fidelity=True,
             )
         elif policy == "inc":
             if (
@@ -201,7 +214,7 @@ def sample(  # noqa: PLR0912, C901, PLR0915
                 user_priors = False
 
             if inc is None:
-                inc = self.pipeline_space.sample_default_configuration().clone()
+                inc = self.pipeline_space.from_dict(self.pipeline_space.default_config)
                 logger.warning(
                     "No incumbent config found, using default as the incumbent."
                 )
@@ -251,7 +264,8 @@ def sample(  # noqa: PLR0912, C901, PLR0915
                     f"Crossing over with user_priors={user_priors} with p={probs}"
                 )
                 # sampling a configuration either randomly or from a prior
-                _config = self.pipeline_space.sample(
+                _config = sample_one_old(
+                    self.pipeline_space,
                     patience=self.patience,
                     user_priors=user_priors,
                     ignore_fidelity=True,
@@ -274,9 +288,11 @@ def sample(  # noqa: PLR0912, C901, PLR0915
                     f"{{'mutation', 'crossover', 'hypersphere', 'gaussian'}}"
                 )
         else:
-            # random
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=True
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=False,
+                ignore_fidelity=True,
             )
         return config
 
@@ -316,8 +332,8 @@ def update_model(
         pending_x: list[SearchSpace],
         decay_t: float | None = None,
     ) -> None:
-        x_train = self._encoder.encode([config.hp_values() for config in train_x])
-        x_pending = self._encoder.encode([config.hp_values() for config in pending_x])
+        x_train = self._encoder.encode([config._values for config in train_x])
+        x_pending = self._encoder.encode([config._values for config in pending_x])
         y_train = torch.tensor(train_y, dtype=torch.float64, device=self.device)
 
         # TODO: Most of this just copies BO and the duplication can be replaced
@@ -377,7 +393,7 @@ def sample(
         """
         # sampling random configurations
         samples = [
-            self.pipeline_space.sample(user_priors=False, ignore_fidelity=True)
+            sample_one_old(self.pipeline_space, user_priors=False, ignore_fidelity=True)
             for _ in range(SAMPLE_THRESHOLD)
         ]
 
diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py
index 8882b9cf..3d27b061 100644
--- a/neps/optimizers/multi_fidelity/successive_halving.py
+++ b/neps/optimizers/multi_fidelity/successive_halving.py
@@ -26,6 +26,7 @@
     Integer,
     SearchSpace,
 )
+from neps.search_spaces.functions import sample_one_old
 
 if TYPE_CHECKING:
     from neps.state.optimizer import BudgetInfo
@@ -371,14 +372,14 @@ def sample_new_config(
     ) -> SearchSpace:
         # Samples configuration from policy or random
         if self.sampling_policy is None:
-            config = self.pipeline_space.sample(
+            return sample_one_old(
+                self.pipeline_space,
                 patience=self.patience,
                 user_priors=self.use_priors,
                 ignore_fidelity=True,
             )
-        else:
-            config = self.sampling_policy.sample(**self.sampling_args)
-        return config
+
+        return self.sampling_policy.sample(**self.sampling_args)
 
     def _generate_new_config_id(self) -> int:
         if len(self.observed_configs) == 0:
@@ -405,6 +406,9 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         Returns:
             [type]: [description]
         """
+        fidelity_name = self.pipeline_space.fidelity_name
+        assert fidelity_name is not None
+
         rung_to_promote = self.is_promotable()
         if rung_to_promote is not None:
             # promotes the first recorded promotable config in the argsort-ed rung
@@ -412,7 +416,10 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
             config = row["config"].clone()
             rung = rung_to_promote + 1
             # assigning the fidelity to evaluate the config at
-            config.fidelity.set_value(self.rung_map[rung])
+
+            config_values = config._values
+            config_values[fidelity_name] = self.rung_map[rung]
+
             # updating config IDs
             previous_config_id = f"{row.name}_{rung_to_promote}"
             config_id = f"{row.name}_{rung}"
@@ -420,7 +427,6 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
             rung_id = self.min_rung
             # using random instead of np.random to be consistent with NePS BO
             rng = random.Random(None)  # TODO: Seeding
-
             if (
                 self.use_priors
                 and self.sample_default_first
@@ -431,10 +437,10 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
                     rung_id = self.max_rung
                     logger.info("Next config will be evaluated at target fidelity.")
                 logger.info("Sampling the default configuration...")
-                config = self.pipeline_space.sample_default_configuration()
-
+                config = self.pipeline_space.from_dict(self.pipeline_space.default_config)
             elif rng.random() < self.random_interleave_prob:
-                config = self.pipeline_space.sample(
+                config = sample_one_old(
+                    self.pipeline_space,
                     patience=self.patience,
                     user_priors=False,  # sample uniformly random
                     ignore_fidelity=True,
@@ -443,13 +449,13 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
                 config = self.sample_new_config(rung=rung_id)
 
             fidelity_value = self.rung_map[rung_id]
-            assert config.fidelity is not None
-            config.fidelity.set_value(fidelity_value)
+            config_values = config._values
+            config_values[fidelity_name] = fidelity_value
 
             previous_config_id = None
             config_id = f"{self._generate_new_config_id()}_{rung_id}"
 
-        return config.hp_values(), config_id, previous_config_id
+        return config_values, config_id, previous_config_id
 
     def _enhance_priors(self, confidence_score: dict[str, float] | None = None) -> None:
         """Only applicable when priors are given along with a confidence.
diff --git a/neps/optimizers/multi_fidelity_prior/priorband.py b/neps/optimizers/multi_fidelity_prior/priorband.py
index 22fc979b..ef9c6002 100644
--- a/neps/optimizers/multi_fidelity_prior/priorband.py
+++ b/neps/optimizers/multi_fidelity_prior/priorband.py
@@ -91,7 +91,7 @@ def find_incumbent(self, rung: int | None = None) -> SearchSpace:
         else:
             # THIS block should not ever execute, but for runtime anomalies, if no
             # incumbent can be extracted, the prior is treated as the incumbent
-            inc = self.pipeline_space.sample_default_configuration()
+            inc = self.pipeline_space.from_dict(self.pipeline_space.default_config)
             logger.warning(
                 "Treating the prior as the incumbent. "
                 "Please check if this should not happen."
@@ -259,7 +259,7 @@ def _prior_to_incumbent_ratio_dynamic(self, rung: int) -> tuple[float, float]:
         # requires at least eta completed configurations to begin computing scores
         if len(self.rung_histories[rung]["config"]) >= self.eta:
             # retrieve the prior
-            prior = self.pipeline_space.sample_default_configuration()
+            prior = self.pipeline_space.from_dict(self.pipeline_space.default_config)
             # retrieve the global incumbent
             inc = self.find_incumbent()
             # subsetting the top 1/eta configs from the rung
diff --git a/neps/optimizers/multi_fidelity_prior/utils.py b/neps/optimizers/multi_fidelity_prior/utils.py
index b0e164ef..c8a8c7c7 100644
--- a/neps/optimizers/multi_fidelity_prior/utils.py
+++ b/neps/optimizers/multi_fidelity_prior/utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 import numpy as np
 import torch
@@ -10,14 +10,12 @@
     Categorical,
     Constant,
     GraphParameter,
+    Float,
+    Integer,
     SearchSpace,
 )
 from neps.search_spaces.encoding import ConfigEncoder
-from neps.search_spaces.hyperparameters.float import Float
-from neps.search_spaces.hyperparameters.integer import Integer
-
-if TYPE_CHECKING:
-    import pandas as pd
+from neps.search_spaces.functions import sample_one_old, pairwise_dist
 
 
 def update_fidelity(config: SearchSpace, fidelity: int | float) -> SearchSpace:
@@ -98,7 +96,7 @@ def custom_crossover(
     Returns a configuration where each HP in config1 has `crossover_prob`% chance of
     getting config2's value of the corresponding HP. By default, crossover rate is 50%.
     """
-    _existing = config1.hp_values()
+    _existing = config1._values
 
     for _ in range(patience):
         child_config = {}
@@ -114,7 +112,8 @@ def custom_crossover(
     # fail safe check to handle edge cases where config1=config2 or
     # config1 extremely local to config2 such that crossover fails to
     # generate new config in a discrete (sub-)space
-    return config1.sample(
+    return sample_one_old(
+        config1,
         patience=patience,
         user_priors=False,
         ignore_fidelity=True,
@@ -130,8 +129,8 @@ def compute_config_dist(config1: SearchSpace, config2: SearchSpace) -> float:
     the Hamming distance of the categorical subspace.
     """
     encoder = ConfigEncoder.from_parameters({**config1.numerical, **config1.categoricals})
-    configs = encoder.encode([config1.hp_values(), config2.hp_values()])
-    dist = encoder.pdist(configs, square_form=False)
+    configs = encoder.encode([config1._values, config2._values])
+    dist = pairwise_dist(configs, encoder, square_form=False)
     return float(dist.item())
 
 
@@ -146,16 +145,16 @@ def compute_scores(
     # TODO: This could lifted up and just done in the class itself
     # in a vectorized form.
     encoder = ConfigEncoder.from_space(config, include_fidelity=include_fidelity)
-    encoded_config = encoder.encode([config.hp_values()])
+    encoded_config = encoder.encode([config._values])
 
     prior_dist = Prior.from_space(
         prior,
-        center_values=prior.hp_values(),
+        center_values=prior._values,
         include_fidelity=include_fidelity,
     )
     inc_dist = Prior.from_space(
         inc,
-        center_values=inc.hp_values(),
+        center_values=inc._values,
         include_fidelity=include_fidelity,
     )
 
diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py
index 131ca6e4..8bcc8178 100644
--- a/neps/optimizers/random_search/optimizer.py
+++ b/neps/optimizers/random_search/optimizer.py
@@ -7,6 +7,8 @@
 from typing_extensions import override
 
 from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.sampling.priors import UniformPrior
+from neps.search_spaces.encoding import ConfigEncoder
 
 if TYPE_CHECKING:
     from neps.search_spaces.search_space import SearchSpace
@@ -42,6 +44,12 @@ def __init__(
             raise NotImplementedError("Seed is not implemented yet for RandomSearch")
 
         self.seed = seed
+        self.encoder = ConfigEncoder.from_space(
+            pipeline_space,
+            include_fidelity=False,
+            include_constants_when_decoding=True,
+        )
+        self.sampler = UniformPrior(ndim=self.encoder.ncols)
 
     @override
     def ask(
@@ -49,14 +57,8 @@ def ask(
         trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
     ) -> SampledConfig:
-        # TODO: Replace with sampler objects and not `pipeline_space.sample`
         n_trials = len(trials)
-        config = self.pipeline_space.sample(
-            patience=self.patience,
-            user_priors=self.use_priors,
-            ignore_fidelity=self.ignore_fidelity,
-        )
+        config = self.sampler.sample_one(to=self.encoder.domains)
+        config_dict = self.encoder.decode_one(config)
         config_id = str(n_trials + 1)
-        return SampledConfig(
-            config=config.hp_values(), id=config_id, previous_config_id=None
-        )
+        return SampledConfig(config=config_dict, id=config_id, previous_config_id=None)
diff --git a/neps/sampling/priors.py b/neps/sampling/priors.py
index 22e3d5d2..550470b8 100644
--- a/neps/sampling/priors.py
+++ b/neps/sampling/priors.py
@@ -9,9 +9,10 @@
 
 from __future__ import annotations
 
+from abc import abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Protocol
+from typing import TYPE_CHECKING, Any
 from typing_extensions import override
 
 import torch
@@ -29,11 +30,10 @@
 if TYPE_CHECKING:
     from torch.distributions import Distribution
 
-    from neps.search_spaces import Float, Integer
-    from neps.search_spaces.search_space import SearchSpace
+    from neps.search_spaces import Float, Integer, SearchSpace
 
 
-class Prior(Sampler, Protocol):
+class Prior(Sampler):
     """A protocol for priors over search spaces.
 
     Extends from the [`Sampler`][neps.samplers.Sampler] protocol.
@@ -64,6 +64,7 @@ class Prior(Sampler, Protocol):
         actually be `1` (1 / 1) for any value inside the domain.
     """
 
+    @abstractmethod
     def log_pdf(
         self,
         x: torch.Tensor,
@@ -94,7 +95,6 @@ def log_pdf(
             case that only single dimensional tensor is passed, the returns value
             is a scalar.
         """
-        ...
 
     def pdf(
         self, x: torch.Tensor, *, frm: ConfigEncoder | Domain | list[Domain]
@@ -117,10 +117,7 @@ def uniform(cls, ncols: int) -> UniformPrior:
     @classmethod
     def from_parameters(
         cls,
-        parameters: Mapping[
-            str,
-            Categorical | Float | Integer,
-        ],
+        parameters: Mapping[str, Categorical | Float | Integer],
         *,
         center_values: Mapping[str, Any] | None = None,
         confidence_values: Mapping[str, float] | None = None,
diff --git a/neps/sampling/samplers.py b/neps/sampling/samplers.py
index bd115e74..43558eff 100644
--- a/neps/sampling/samplers.py
+++ b/neps/sampling/samplers.py
@@ -6,10 +6,11 @@
 
 from __future__ import annotations
 
+from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from functools import reduce
-from typing import TYPE_CHECKING, Protocol
+from typing import TYPE_CHECKING
 from typing_extensions import override
 
 import torch
@@ -22,14 +23,16 @@
     from neps.sampling.priors import UniformPrior
 
 
-class Sampler(Protocol):
+class Sampler(ABC):
     """A protocol for sampling tensors and vonerting them to a given domain."""
 
     @property
+    @abstractmethod
     def ncols(self) -> int:
         """The number of columns in the samples produced by this sampler."""
         ...
 
+    @abstractmethod
     def sample(
         self,
         n: int | torch.Size,
@@ -56,7 +59,23 @@ def sample(
         Returns:
             A tensor of (n, ndim) points sampled cast to the given domain.
         """
-        ...
+
+    def sample_one(
+        self,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        """Sample a single point and convert it to the given domain.
+
+        The configuration will be a single dimensional tensor of shape
+        `(ncols,)`.
+
+        Please see [`sample`][neps.samplers.Sampler.sample] for more details.
+        """
+        return self.sample(1, to=to, seed=seed, device=device, dtype=dtype).squeeze(0)
 
     @classmethod
     def sobol(cls, ndim: int, *, scramble: bool = True) -> Sobol:
diff --git a/neps/search_spaces/encoding.py b/neps/search_spaces/encoding.py
index 3de6a3f1..d9546499 100644
--- a/neps/search_spaces/encoding.py
+++ b/neps/search_spaces/encoding.py
@@ -429,6 +429,12 @@ def encode(
 
         return buffer
 
+    def decode_one(self, x: torch.Tensor) -> dict[str, Any]:
+        """Decode a tensor representing one configuration into a dict."""
+        if x.ndim == 1:
+            x = x.unsqueeze(0)
+        return self.decode(x)[0]
+
     def decode(self, x: torch.Tensor) -> list[dict[str, Any]]:
         """Decode a tensor of hyperparameter configurations into a list of configurations.
 
@@ -557,4 +563,4 @@ def from_parameters(
                         " please provide it as `constants=`."
                     )
 
-        return ConfigEncoder(transformers, constants=constants)
+        return cls(transformers, constants=constants)
diff --git a/neps/search_spaces/functions.py b/neps/search_spaces/functions.py
new file mode 100644
index 00000000..ba8b6bc9
--- /dev/null
+++ b/neps/search_spaces/functions.py
@@ -0,0 +1,133 @@
+"""Functions for working with search spaces."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import torch
+
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN, Domain
+from neps.search_spaces.parameter import Parameter, ParameterWithPrior
+from neps.search_spaces.search_space import SearchSpace
+
+if TYPE_CHECKING:
+    from neps.search_spaces.encoding import ConfigEncoder
+
+logger = logging.getLogger(__name__)
+
+
+def pairwise_dist(
+    x: torch.Tensor,
+    encoder: ConfigEncoder,
+    *,
+    numerical_ord: int = 2,
+    categorical_ord: int = 0,
+    dtype: torch.dtype = torch.float64,
+    square_form: bool = False,
+) -> torch.Tensor:
+    """Compute the pairwise distance between rows of a tensor.
+
+    Will sum the results of the numerical and categorical distances.
+    The encoding will be normalized such that all numericals lie within the unit
+    cube, and categoricals will by default, have a `p=0` norm, which is equivalent
+    to the Hamming distance.
+
+    Args:
+        x: A tensor of shape `(N, ncols)`.
+        encoder: The encoder used to encode the configs into the tensor.
+        numerical_ord: The order of the norm to use for the numerical columns.
+        categorical_ord: The order of the norm to use for the categorical columns.
+        dtype: The dtype of the output tensor.
+        square_form: If `True`, the output will be a square matrix of shape
+            `(N, N)`. If `False`, the output will be a single dim tensor of shape
+            `1/2 * N * (N - 1)`.
+
+    Returns:
+        The distances, shaped according to `square_form`.
+    """
+    categoricals = encoder.select_categorical(x)
+    numericals = encoder.select_numerical(x)
+
+    dists: torch.Tensor | None = None
+    if numericals is not None:
+        # Ensure they are all within the unit cube
+        numericals = Domain.translate(
+            numericals,
+            frm=encoder.numerical_domains,
+            to=UNIT_FLOAT_DOMAIN,
+        )
+
+        dists = torch.nn.functional.pdist(numericals, p=numerical_ord)
+
+    if categoricals is not None:
+        # Does Hamming distance
+        cat_dists = torch.nn.functional.pdist(categoricals, p=categorical_ord)
+        if dists is None:
+            dists = cat_dists
+        else:
+            dists += cat_dists
+
+    if dists is None:
+        raise ValueError("No columns to compute distances on.")
+
+    if not square_form:
+        return dists
+
+    # Turn the single dimensional vector into a square matrix
+    N = len(x)
+    sq = torch.zeros((N, N), dtype=dtype)
+    row_ix, col_ix = torch.triu_indices(N, N, offset=1)
+    sq[row_ix, col_ix] = dists
+    sq[col_ix, row_ix] = dists
+    return sq
+
+
+def sample_one_old(
+    space: SearchSpace,
+    *,
+    user_priors: bool = False,
+    patience: int = 1,
+    ignore_fidelity: bool = True,
+) -> SearchSpace:
+    """Sample a configuration from the search space.
+
+    Args:
+        space: The search space to sample from.
+        user_priors: Whether to use user priors when sampling.
+        patience: The number of times to try to sample a valid value for a
+            hyperparameter.
+        ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
+
+    Returns:
+        A sampled configuration from the search space.
+    """
+    sampled_hps: dict[str, Parameter] = {}
+
+    for name, hp in space.hyperparameters.items():
+        if hp.is_fidelity and ignore_fidelity:
+            sampled_hps[name] = hp.clone()
+            continue
+
+        for attempt in range(patience):
+            try:
+                if user_priors and isinstance(hp, ParameterWithPrior):
+                    sampled_hps[name] = hp.sample(user_priors=user_priors)
+                else:
+                    sampled_hps[name] = hp.sample()
+                break
+            except Exception as e:  # noqa: BLE001
+                logger.warning(
+                    f"Attempt {attempt + 1}/{patience} failed for"
+                    f" sampling {name}: {e!s}"
+                )
+        else:
+            logger.error(
+                f"Failed to sample valid value for {name} after {patience} attempts"
+            )
+            raise ValueError(
+                f"Could not sample valid value for hyperparameter {name}"
+                f" in {patience} tries!"
+            )
+
+    return SearchSpace(**sampled_hps)
diff --git a/neps/search_spaces/hyperparameters/constant.py b/neps/search_spaces/hyperparameters/constant.py
index 8dfbfdd1..2dab7bfd 100644
--- a/neps/search_spaces/hyperparameters/constant.py
+++ b/neps/search_spaces/hyperparameters/constant.py
@@ -37,7 +37,6 @@ class Constant(Parameter[T, T]):
         Please use
         [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.Constant.set_constant_value]
         if you need to change the value of the constant parameter.
-
     """
 
     def __init__(self, value: T):
@@ -99,19 +98,6 @@ def set_value(self, value: T | None) -> None:
                 f"Tried to set value to {value}, but it is already {self.value}"
             )
 
-    def set_constant_value(self, value: T) -> None:
-        """Set the value of the constant parameter.
-
-        !!! note
-
-            This method is used to set the
-            [`.value`][neps.search_spaces.parameter.Parameter.value]
-            including the [`.default`][neps.search_spaces.parameter.Parameter.default]
-            It is used internally and should not be used by the user.
-        """
-        self._value = value
-        self.default = value
-
     @override
     def value_to_normalized(self, value: T) -> float:
         return 1.0 if value == self._value else 0.0
diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py
index 2573167e..a1566733 100644
--- a/neps/search_spaces/search_space.py
+++ b/neps/search_spaces/search_space.py
@@ -14,6 +14,7 @@
 import yaml
 
 from neps.search_spaces.architecture.graph_grammar import GraphParameter
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN
 from neps.search_spaces.hyperparameters import (
     Categorical,
     Constant,
@@ -181,7 +182,7 @@ class SearchSpace(Mapping[str, Any]):
         know better what to document.
     """
 
-    def __init__(self, **hyperparameters: Parameter):
+    def __init__(self, **hyperparameters: Parameter):  # noqa: C901, PLR0912
         """Initialize the SearchSpace with hyperparameters.
 
         Args:
@@ -218,6 +219,30 @@ def __init__(self, **hyperparameters: Parameter):
         self.fidelity_name: str | None = _fidelity_name
         self.has_prior: bool = _has_prior
 
+        self.default_config = {}
+        for name, hp in _hyperparameters:
+            if hp.default is not None:
+                self.default_config[name] = hp.default
+                continue
+
+            match hp:
+                case Categorical():
+                    first_choice = hp.choices[0]
+                    self.default_config[name] = first_choice
+                case Integer() | Float():
+                    if hp.is_fidelity:
+                        self.default_config[name] = hp.upper
+                        continue
+
+                    midpoint = hp.domain.cast_one(0.5, frm=UNIT_FLOAT_DOMAIN)
+                    self.default_config[name] = midpoint
+                case Constant():
+                    self.default_config[name] = hp.value
+                case GraphParameter():
+                    self.default_config[name] = hp.default
+                case _:
+                    raise TypeError(f"Unknown hyperparameter type {hp}")
+
         self.categoricals: Mapping[str, Categorical] = {
             k: hp for k, hp in _hyperparameters if isinstance(hp, Categorical)
         }
@@ -238,61 +263,14 @@ def __init__(self, **hyperparameters: Parameter):
             assert isinstance(_fidelity_param, Integer | Float)
             self.fidelities = {_fidelity_name: _fidelity_param}
 
-    def sample(
-        self,
-        *,
-        user_priors: bool = False,
-        patience: int = 1,
-        ignore_fidelity: bool = True,
-    ) -> SearchSpace:
-        """Sample a configuration from the search space.
-
-        Args:
-            user_priors: Whether to use user priors when sampling.
-            patience: The number of times to try to sample a valid value for a
-                hyperparameter.
-            ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
-
-        Returns:
-            A sampled configuration from the search space.
-        """
-        sampled_hps: dict[str, Parameter] = {}
-
-        for name, hp in self.hyperparameters.items():
-            if hp.is_fidelity and ignore_fidelity:
-                sampled_hps[name] = hp.clone()
-                continue
-
-            for attempt in range(patience):
-                try:
-                    if user_priors and isinstance(hp, ParameterWithPrior):
-                        sampled_hps[name] = hp.sample(user_priors=user_priors)
-                    else:
-                        sampled_hps[name] = hp.sample()
-                    break
-                except Exception as e:  # noqa: BLE001
-                    logger.warning(
-                        f"Attempt {attempt + 1}/{patience} failed for"
-                        f" sampling {name}: {e!s}"
-                    )
-            else:
-                logger.error(
-                    f"Failed to sample valid value for {name} after {patience} attempts"
-                )
-                raise ValueError(
-                    f"Could not sample valid value for hyperparameter {name}"
-                    f" in {patience} tries!"
-                )
-
-        return SearchSpace(**sampled_hps)
-
-    def hp_values(self) -> dict[str, Any]:
-        """Get the values for each hyperparameter in this configuration."""
-        return {
+        # TODO: Deprecate out, ideally configs are just dictionaries,
+        # not attached to this space object
+        self._values = {
             hp_name: hp if isinstance(hp, GraphParameter) else hp.value
             for hp_name, hp in self.hyperparameters.items()
         }
 
+    # TODO: Deprecate and remove
     def from_dict(self, config: Mapping[str, Any | GraphParameter]) -> SearchSpace:
         """Create a new instance of this search space with parameters set from the config.
 
@@ -302,6 +280,7 @@ def from_dict(self, config: Mapping[str, Any | GraphParameter]) -> SearchSpace:
         new = self.clone()
         for name, val in config.items():
             new.hyperparameters[name].load_from(val)
+            new._values[name] = new.hyperparameters[name].value
 
         return new
 
@@ -309,42 +288,6 @@ def clone(self) -> SearchSpace:
         """Create a copy of the search space."""
         return self.__class__(**{k: v.clone() for k, v in self.hyperparameters.items()})
 
-    def sample_default_configuration(
-        self,
-        *,
-        patience: int = 1,
-        ignore_fidelity: bool = True,
-    ) -> SearchSpace:
-        """Sample the default configuration from the search space.
-
-        By default, if there is no default set for a hyperparameter, an error will be
-        raised. If `ignore_missing_defaults=True`, then a sampled value will be used
-        instead.
-
-        Args:
-            patience: The number of times to try to sample a valid value for a
-                hyperparameter.
-            ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
-            ignore_missing_defaults: Whether to ignore missing defaults when setting
-                the default configuration.
-
-        Returns:
-            The default configuration.
-        """
-        # Sample a random config and then set the defaults if there are any
-        config = self.sample(patience=patience, ignore_fidelity=ignore_fidelity)
-        for hp_name, hp in self.hyperparameters.items():
-            if hp.is_fidelity and ignore_fidelity:
-                continue
-
-            if hp.default is None:
-                raise ValueError(f"No defaults specified for {hp} in the space.")
-
-                # Use the sampled value instead
-            config[hp_name].set_value(hp.default)
-
-        return config
-
     def __getitem__(self, key: str) -> Parameter:
         return self.hyperparameters[key]
 
diff --git a/tests/test_config_encoder.py b/tests/test_config_encoder.py
index 25c44165..f73419bc 100644
--- a/tests/test_config_encoder.py
+++ b/tests/test_config_encoder.py
@@ -6,11 +6,7 @@
     ConfigEncoder,
     MinMaxNormalizer,
 )
-from neps.search_spaces.hyperparameters import (
-    Categorical,
-    Float,
-    Integer,
-)
+from neps.search_spaces.hyperparameters import Categorical, Float, Integer
 
 
 def test_config_encoder_default() -> None:
diff --git a/tests/test_search_space_functions.py b/tests/test_search_space_functions.py
new file mode 100644
index 00000000..7327544f
--- /dev/null
+++ b/tests/test_search_space_functions.py
@@ -0,0 +1,74 @@
+import torch
+from neps.search_spaces.encoding import ConfigEncoder
+from neps.search_spaces.hyperparameters import Categorical, Float, Integer
+from neps.search_spaces.functions import pairwise_dist
+
+
+def test_config_encoder_pdist_calculation() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config1])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+    assert dist.item() == 0.0
+
+    # Opposite configs, max distance
+    x = encoder.encode([config1, config2])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+
+    # The first config should have it's p2 euclidean distance as the norm
+    # of the distances between these two configs, i.e. the distance along the
+    # diagonal of a unit-square they belong to
+    _first_config_numerical_encoding = torch.tensor([[0.0, 0.0]], dtype=torch.float64)
+    _second_config_numerical_encoding = torch.tensor([[1.0, 1.0]], dtype=torch.float64)
+    _expected_numerical_dist = torch.linalg.norm(
+        _first_config_numerical_encoding - _second_config_numerical_encoding,
+        ord=2,
+    )
+
+    # The categorical distance should just be one, as they are different
+    _expected_categorical_dist = 1.0
+
+    _expected_dist = _expected_numerical_dist + _expected_categorical_dist
+    assert torch.isclose(dist, _expected_dist)
+
+
+def test_config_encoder_pdist_squareform() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "dog", "b": 5, "c": 5}
+    config3 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config2, config3])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+
+    # 3 possible distances
+    assert dist.shape == (3,)
+    torch.testing.assert_close(
+        dist,
+        torch.tensor([1.6285, 2.4142, 1.7857], dtype=torch.float64),
+        atol=1e-4,
+        rtol=1e-4,
+    )
+
+    dist_sq = pairwise_dist(x, encoder=encoder, square_form=True)
+    assert dist_sq.shape == (3, 3)
+
+    # Distance to self along diagonal should be 0
+    torch.testing.assert_close(dist_sq.diagonal(), torch.zeros(3, dtype=torch.float64))
+
+    # Should be symmetric
+    torch.testing.assert_close(dist_sq, dist_sq.T)