automl · karibbov · Apr 7, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 10, 2024
diff --git a/docs/analyse.md b/docs/analyse.md
@@ -166,6 +166,165 @@ The scatter plot matrix view provides an in-depth analysis of pairwise relations
 
 ![hparam_loggings3](doc_images/tensorboard/tblogger_hparam3.jpg)
 
+## Summary CSV
+
+The argument `post_run_summary` in `neps.run` allows for the automatic generation of CSV files after a run is complete. The new root directory after utilizing this argument will look like the following:
+
+```
+ROOT_DIRECTORY
+├── results
+│  └── config_1
+│      ├── config.yaml
+│      ├── metadata.yaml
+│      └── result.yaml
+├── summary_csv
+│  ├── config_data.csv
+│  └── run_status.csv
+├── all_losses_and_configs.txt
+├── best_loss_trajectory.txt
+└── best_loss_with_config_trajectory.txt
+```
+
+- *`config_data.csv`*: Contains all configuration details in CSV format, ordered by ascending `loss`. Details include configuration hyperparameters, any returned result from the `run_pipeline` function, and metadata information.
+
+- *`run_status.csv`*: Provides general run details, such as the number of sampled configs, best configs, number of failed configs, best loss, etc.
+
+## TensorBoard Integration
+
+### Introduction
+
+[TensorBoard](https://www.tensorflow.org/tensorboard) serves as a valuable tool for visualizing machine learning experiments, offering the ability to observe losses and metrics throughout the model training process. In NePS, we use this powerful tool to show metrics of configurations during training in addition to comparisons to different hyperparameters used in the search for better diagnosis of the model.
+
+### The Logging Function
+
+The `tblogger.log` function is invoked within the model's training loop to facilitate logging of key metrics.
+
+!!! tip
+
+```
+The logger function is primarily designed for implementation within the `run_pipeline` function during the training of the neural network.
+```
+
+- **Signature:**
+
+```python
+tblogger.log(
+    loss: float,
+    current_epoch: int,
+    write_config_scalar: bool = False,
+    write_config_hparam: bool = True,
+    write_summary_incumbent: bool = False,
+    extra_data: dict | None = None
+)
+```
+
+- **Parameters:**
+  - `loss` (float): The loss value to be logged.
+  - `current_epoch` (int): The current epoch or iteration number.
+  - `write_config_scalar` (bool, optional): Set to `True` for a live loss trajectory for each configuration.
+  - `write_config_hparam` (bool, optional): Set to `True` for live parallel coordinate, scatter plot matrix, and table view.
+  - `write_summary_incumbent` (bool, optional): Set to `True` for a live incumbent trajectory.
+  - `extra_data` (dict, optional): Additional data to be logged, provided as a dictionary.
+
+### Extra Custom Logging
+
+NePS provides dedicated functions for customized logging using the `extra_data` argument.
+
+!!! note "Custom Logging Instructions"
+
+```
+Name the dictionary keys as the names of the values you want to log and pass one of the following functions as the values for a successful logging process.
+```
+
+#### 1- Extra Scalar Logging
+
+Logs new scalar data during training. Uses `current_epoch` from the log function as its `global_step`.
+
+- **Signature:**
+
+```python
+tblogger.scalar_logging(value: float)
+```
+
+- **Parameters:**
+  - `value` (float): Any scalar value to be logged at the current epoch of `tblogger.log` function.
+
+#### 2- Extra Image Logging
+
+Logs images during training. Images can be resized, randomly selected, and a specified number can be logged at specified intervals. Uses `current_epoch` from the log function as its `global_step`.
+
+- **Signature:**
+
+```python
+tblogger.image_logging(
+    image: torch.Tensor,
+    counter: int = 1,
+    resize_images: list[None | int] | None = None,
+    random_images: bool = True,
+    num_images: int = 20,
+    seed: int | np.random.RandomState | None = None,
+)
+```
+
+- **Parameters:**
+  - `image` (torch.Tensor): Image tensor to be logged.
+  - `counter` (int): Log images every counter epochs (i.e., when current_epoch % counter equals 0).
+  - `resize_images` (list of int, optional): List of integers for image sizes after resizing (default: \[32, 32\]).
+  - `random_images` (bool, optional): Images are randomly selected if True (default: True).
+  - `num_images` (int, optional): Number of images to log (default: 20).
+  - `seed` (int or np.random.RandomState or None, optional): Seed value or RandomState instance to control randomness and reproducibility (default: None).
+
+### Logging Example
+
+For illustration purposes, we have employed a straightforward example involving the tuning of hyperparameters for a model utilized in the classification of the MNIST dataset provided by [torchvision](https://pytorch.org/vision/main/generated/torchvision.datasets.MNIST.html).
+
+You can find this example [here](https://github.com/automl/neps/blob/master/neps_examples/convenience/neps_tblogger_tutorial.py)
+
+!!! info "Important"
+We have optimized the example for computational efficiency. If you wish to replicate the exact results showcased in the following section, we recommend the following modifications:
+
+```
+1- Increase maximum epochs from 2 to 10
+
+2- Set the `write_summary_incumbent` argument to `True`
+
+3- Change the searcher from `random_search` to `bayesian_optimization`
+
+4- Increase the maximum evaluations before disabling `tblogger` from 2 to 14
+
+5- Increase the maximum evaluations after disabling `tblogger` from 3 to 15
+```
+
+### Visualization Results
+
+The following command will open a local host for TensorBoard visualizations, allowing you to view them either in real-time or after the run is complete.
+
+```bash
+tensorboard --logdir path/to/root_directory
+```
+
+This image shows visualizations related to scalar values logged during training. Scalars typically include metrics such as loss, incumbent trajectory, a summary of losses for all configurations, and any additional data provided via the `extra_data` argument in the `tblogger.log` function.
+
+![scalar_loggings](doc_images/tensorboard/tblogger_scalar.jpg)
+
+This image represents visualizations related to logged images during training. It could include snapshots of input data, model predictions, or any other image-related information. In our case, we use images to depict instances of incorrect predictions made by the model.
+
+![image_loggings](doc_images/tensorboard/tblogger_image.jpg)
+
+The following images showcase visualizations related to hyperparameter logging in TensorBoard. These plots include three different views, providing insights into the relationship between different hyperparameters and their impact on the model.
+
+In the table view, you can explore hyperparameter configurations across five different trials. The table displays various hyperparameter values alongside corresponding evaluation metrics.
+
+![hparam_loggings1](doc_images/tensorboard/tblogger_hparam1.jpg)
+
+The parallel coordinate plot offers a holistic perspective on hyperparameter configurations. By presenting multiple hyperparameters simultaneously, this view allows you to observe the interactions between variables, providing insights into their combined influence on the model.
+
+![hparam_loggings2](doc_images/tensorboard/tblogger_hparam2.jpg)
+
+The scatter plot matrix view provides an in-depth analysis of pairwise relationships between different hyperparameters. By visualizing correlations and patterns, this view aids in identifying key interactions that may influence the model's performance.
+
+![hparam_loggings3](doc_images/tensorboard/tblogger_hparam3.jpg)
+
 ## Status
 
 To show status information about a neural pipeline search run, use

diff --git a/neps/metahyper/__init__.py b/neps/metahyper/__init__.py
@@ -1,2 +1,2 @@
-from .api import ConfigResult, Sampler, read, metahyper_run
+from .api import ConfigResult, Sampler, metahyper_run, read
 from .utils import instance_from_map
diff --git a/neps/metahyper/api.py b/neps/metahyper/api.py
@@ -2,6 +2,7 @@
 
 import inspect
 import logging
+import os
 import shutil
 import time
 import warnings
@@ -273,6 +274,7 @@ def _check_max_evaluations(
     continue_until_max_evaluation_completed,
 ):
     logger.debug("Checking if max evaluations is reached")
+    # TODO: maybe not read everything again?
     previous_results, pending_configs, pending_configs_free = read(
         optimization_dir, serializer, logger
     )
@@ -292,6 +294,9 @@ def _sample_config(optimization_dir, sampler, serializer, logger, pre_load_hooks
     )
 
     base_result_directory = optimization_dir / "results"
+    logger.debug(f"Previous results: {previous_results}")
+    logger.debug(f"Pending configs: {pending_configs}")
+    logger.debug(f"Pending configs: {pending_configs_free}")
 
     logger.debug("Sampling a new configuration")
 
@@ -315,6 +320,9 @@ def _sample_config(optimization_dir, sampler, serializer, logger, pre_load_hooks
             "communication, but most likely some configs crashed during their execution "
             "or a jobtime-limit was reached."
         )
+    # write some extra data per configuration if the optimizer has any
+    # if hasattr(sampler, "evaluation_data"):
+    #    sampler.evaluation_data.write_all(pipeline_directory)
 
     if previous_config_id is not None:
         previous_config_id_file = pipeline_directory / "previous_config.id"
@@ -452,6 +460,7 @@ def metahyper_run(
     )
 
     evaluations_in_this_run = 0
+
     while True:
         if max_evaluations_total is not None and _check_max_evaluations(
             optimization_dir,

diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py
@@ -60,7 +60,7 @@ def load_state(self, state: Any):  # pylint: disable=no-self-use
         super().load_state(state)
 
     def load_config(self, config_dict):
-        config = deepcopy(self.pipeline_space)
+        config = self.pipeline_space.copy()
         config.load_from(config_dict)
         return config
 

diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py
@@ -4,9 +4,11 @@
 from typing import Callable
 
 from .ei import ComprehensiveExpectedImprovement
-from .mf_ei import MFEI
-from .ucb import UpperConfidenceBound, MF_UCB
-
+from .mf_ei import MFEI, MFEI_AtMax, MFEI_Dyna, MFEI_Random
+from .mf_pi import MFPI, MFPI_AtMax, MFPI_Dyna, MFPI_Random, MFPI_Random_HiT
+from .mf_two_step import MF_TwoStep
+from .mf_ucb import MF_UCB, MF_UCB_AtMax, MF_UCB_Dyna
+from .ucb import UpperConfidenceBound
 
 AcquisitionMapping: dict[str, Callable] = {
     "EI": partial(
@@ -40,4 +42,73 @@
         MF_UCB,
         maximize=False,
     ),
+    "MFEI-max": partial(
+        MFEI_AtMax,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MFEI-dyna": partial(
+        MFEI_Dyna,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MFEI-random": partial(
+        MFPI_Random,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MF-UCB-max": partial(
+        MF_UCB_AtMax,
+        maximize=False,
+    ),
+    "MF-UCB-dyna": partial(
+        MF_UCB_Dyna,
+        maximize=False,
+    ),
+    "MF_TwoStep": partial(
+        MF_TwoStep,
+        maximize=False,
+    ),
+    "MFPI": partial(
+        MFPI,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MFPI-max": partial(
+        MFPI_AtMax,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MFPI-random-horizon": partial(
+        MFPI_Random,
+        in_fill="best",
+        augmented_ei=False,
+        horizon="random",
+        threshold="0.0",
+    ),
+    "MFPI-dyna": partial(
+        MFPI_Dyna,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+    "MFPI-random": partial(
+        MFPI_Random,
+        in_fill="best",
+        augmented_ei=False,
+    ),
+
+# Further Potential Acquisition Functions
+
+#    "MFPI-thresh-max": partial(
+#        MFPI_Random,
+#        in_fill="best",
+#        augmented_ei=False,
+#        horizon="max",
+#        threshold="random",
+#    ),
+#    "MFPI-random-hit": partial(
+#        MFPI_Random_HiT,
+#        in_fill="best",
+#        augmented_ei=False,
+#    ),
 }