switched all tqdm pbars from context manager to manually closing

M-R-Schaefer · M-R-Schaefer · commit 63e39815cafa · 2023-10-04T14:24:41.000+02:00
diff --git a/apax/bal/api.py b/apax/bal/api.py
@@ -1,9 +1,9 @@
 from functools import partial
 from typing import List, Union
-from ase import Atoms
 
 import jax
 import numpy as np
+from ase import Atoms
 from click import Path
 from tqdm import trange
 
@@ -55,7 +55,7 @@ def compute_features(feature_fn, dataset: TFPipeline, processing_batch_size: int
         features.append(np.asarray(g))
         pbar.update(g.shape[0])
     pbar.close()
-            
+
     features = np.concatenate(features, axis=0)
     return features
 
@@ -67,8 +67,8 @@ def kernel_selection(
     base_fm_options: dict,
     selection_method: str,
     feature_transforms: list = [],
-    selection_batch_size: int =10,
-    processing_batch_size: int =64,
+    selection_batch_size: int = 10,
+    processing_batch_size: int = 64,
 ):
     n_models = 1 if isinstance(model_dir, (Path, str)) else len(model_dir)
     is_ensemble = n_models > 1
diff --git a/apax/data/preprocessing.py b/apax/data/preprocessing.py
@@ -78,7 +78,14 @@ def dataset_neighborlist(
         r_max,
     )
 
-    nl_pbar =  trange(len(positions), desc="Precomputing NL", ncols=100, mininterval=0.25, disable=disable_pbar, leave=True)
+    nl_pbar = trange(
+        len(positions),
+        desc="Precomputing NL",
+        ncols=100,
+        mininterval=0.25,
+        disable=disable_pbar,
+        leave=True,
+    )
     for i, position in enumerate(positions):
         if np.all(box[i] < 1e-6):
             position = jnp.asarray(position)
diff --git a/apax/md/nvt.py b/apax/md/nvt.py
@@ -233,34 +233,35 @@ def body_fn(i, state):
     start = time.time()
     sim_time = n_outer * ensemble.dt  # * units.fs
     log.info("running nvt for %.1f fs", sim_time)
-    with trange(
+    sim_pbar = trange(
         0, n_steps, desc="Simulation", ncols=100, disable=disable_pbar, leave=True
-    ) as sim_pbar:
-        while step < n_outer:
-            new_state, neighbor, current_temperature = sim(state, neighbor)
-
-            if neighbor.did_buffer_overflow:
-                log.info("step %d: neighbor list overflowed, reallocating.", step)
-                traj_handler.reset_buffer()
-                neighbor = neighbor_fn.allocate(
-                    state.position
-                )  # TODO check that this actually works
-            else:
-                state = new_state
-                step += 1
-
-                if np.any(np.isnan(state.position)) or np.any(np.isnan(state.velocity)):
-                    raise ValueError(
-                        f"NaN encountered, simulation aborted after {step} steps."
-                    )
-
-                if step % checkpoint_interval == 0:
-                    log.info("saving checkpoint at step: %d", step)
-                    log.info("checkpoints not yet implemented")
-
-                if step % pbar_update_freq == 0:
-                    sim_pbar.set_postfix(T=f"{(current_temperature):.1f} K")  # set string
-                    sim_pbar.update(pbar_increment)
+    )
+    while step < n_outer:
+        new_state, neighbor, current_temperature = sim(state, neighbor)
+
+        if neighbor.did_buffer_overflow:
+            log.info("step %d: neighbor list overflowed, reallocating.", step)
+            traj_handler.reset_buffer()
+            neighbor = neighbor_fn.allocate(
+                state.position
+            )  # TODO check that this actually works
+        else:
+            state = new_state
+            step += 1
+
+            if np.any(np.isnan(state.position)) or np.any(np.isnan(state.velocity)):
+                raise ValueError(
+                    f"NaN encountered, simulation aborted after {step} steps."
+                )
+
+            if step % checkpoint_interval == 0:
+                log.info("saving checkpoint at step: %d", step)
+                log.info("checkpoints not yet implemented")
+
+            if step % pbar_update_freq == 0:
+                sim_pbar.set_postfix(T=f"{(current_temperature):.1f} K")  # set string
+                sim_pbar.update(pbar_increment)
+    sim_pbar.close()
 
     barrier_wait()
     traj_handler.write()
diff --git a/apax/train/eval.py b/apax/train/eval.py
@@ -85,17 +85,19 @@ def predict(model, params, Metrics, loss_fn, test_ds, callbacks):
 
     epoch_loss.update({"test_loss": 0.0})
     test_metrics = Metrics.empty()
-    with trange(
+
+    batch_pbar = trange(
         0, test_steps_per_epoch, desc="Batches", ncols=100, disable=False, leave=True
-    ) as batch_pbar:
-        for batch_idx in range(test_steps_per_epoch):
-            inputs, labels = next(batch_test_ds)
+    )
+    for batch_idx in range(test_steps_per_epoch):
+        inputs, labels = next(batch_test_ds)
 
-            test_metrics, batch_loss = test_step_fn(params, inputs, labels, test_metrics)
+        test_metrics, batch_loss = test_step_fn(params, inputs, labels, test_metrics)
 
-            epoch_loss["test_loss"] += batch_loss
-            batch_pbar.set_postfix(test_loss=epoch_loss["test_loss"] / batch_idx)
-            batch_pbar.update()
+        epoch_loss["test_loss"] += batch_loss
+        batch_pbar.set_postfix(test_loss=epoch_loss["test_loss"] / batch_idx)
+        batch_pbar.update()
+    batch_pbar.close()
 
     epoch_loss["test_loss"] /= test_steps_per_epoch
     epoch_loss["test_loss"] = float(epoch_loss["test_loss"])
diff --git a/apax/train/trainer.py b/apax/train/trainer.py
@@ -53,83 +53,84 @@ def fit(
     best_loss = np.inf
     early_stopping_counter = 0
     epoch_loss = {}
-    with trange(
+    epoch_pbar = trange(
         start_epoch, n_epochs, desc="Epochs", ncols=100, disable=disable_pbar, leave=True
-    ) as epoch_pbar:
-        for epoch in range(start_epoch, n_epochs):
-            epoch_start_time = time.time()
-            callbacks.on_epoch_begin(epoch=epoch + 1)
-
-            epoch_loss.update({"train_loss": 0.0})
-            train_batch_metrics = Metrics.empty()
-
-            for batch_idx in range(train_steps_per_epoch):
-                callbacks.on_train_batch_begin(batch=batch_idx)
-
-                inputs, labels = next(batch_train_ds)
-                train_batch_metrics, batch_loss, state = train_step(
-                    state, inputs, labels, train_batch_metrics
-                )
-
-                epoch_loss["train_loss"] += batch_loss
-                callbacks.on_train_batch_end(batch=batch_idx)
-
-            epoch_loss["train_loss"] /= train_steps_per_epoch
-            epoch_loss["train_loss"] = float(epoch_loss["train_loss"])
-
-            epoch_metrics = {
-                f"train_{key}": float(val)
-                for key, val in train_batch_metrics.compute().items()
-            }
-
-            if val_ds is not None:
-                epoch_loss.update({"val_loss": 0.0})
-                val_batch_metrics = Metrics.empty()
-                for batch_idx in range(val_steps_per_epoch):
-                    inputs, labels = next(batch_val_ds)
-
-                    val_batch_metrics, batch_loss = val_step(
-                        state.params, inputs, labels, val_batch_metrics
-                    )
-                    epoch_loss["val_loss"] += batch_loss
-
-                epoch_loss["val_loss"] /= val_steps_per_epoch
-                epoch_loss["val_loss"] = float(epoch_loss["val_loss"])
+    )
+    for epoch in range(start_epoch, n_epochs):
+        epoch_start_time = time.time()
+        callbacks.on_epoch_begin(epoch=epoch + 1)
 
-                epoch_metrics.update(
-                    {
-                        f"val_{key}": float(val)
-                        for key, val in val_batch_metrics.compute().items()
-                    }
-                )
+        epoch_loss.update({"train_loss": 0.0})
+        train_batch_metrics = Metrics.empty()
 
-            epoch_metrics.update({**epoch_loss})
+        for batch_idx in range(train_steps_per_epoch):
+            callbacks.on_train_batch_begin(batch=batch_idx)
 
-            epoch_end_time = time.time()
-            epoch_metrics.update({"epoch_time": epoch_end_time - epoch_start_time})
+            inputs, labels = next(batch_train_ds)
+            train_batch_metrics, batch_loss, state = train_step(
+                state, inputs, labels, train_batch_metrics
+            )
 
-            ckpt = {"model": state, "epoch": epoch}
-            if epoch % ckpt_interval == 0:
-                ckpt_manager.save_checkpoint(ckpt, epoch, latest_dir)
+            epoch_loss["train_loss"] += batch_loss
+            callbacks.on_train_batch_end(batch=batch_idx)
 
-            if epoch_metrics["val_loss"] < best_loss:
-                best_loss = epoch_metrics["val_loss"]
-                ckpt_manager.save_checkpoint(ckpt, epoch, best_dir)
-                early_stopping_counter = 0
-            else:
-                early_stopping_counter += 1
+        epoch_loss["train_loss"] /= train_steps_per_epoch
+        epoch_loss["train_loss"] = float(epoch_loss["train_loss"])
 
-            callbacks.on_epoch_end(epoch=epoch, logs=epoch_metrics)
+        epoch_metrics = {
+            f"train_{key}": float(val)
+            for key, val in train_batch_metrics.compute().items()
+        }
 
-            epoch_pbar.set_postfix(val_loss=epoch_metrics["val_loss"])
-            epoch_pbar.update()
+        if val_ds is not None:
+            epoch_loss.update({"val_loss": 0.0})
+            val_batch_metrics = Metrics.empty()
+            for batch_idx in range(val_steps_per_epoch):
+                inputs, labels = next(batch_val_ds)
 
-            if patience is not None and early_stopping_counter >= patience:
-                log.info(
-                    "Early stopping patience exceeded. Stopping training after"
-                    f" {epoch} epochs."
+                val_batch_metrics, batch_loss = val_step(
+                    state.params, inputs, labels, val_batch_metrics
                 )
-                break
+                epoch_loss["val_loss"] += batch_loss
+
+            epoch_loss["val_loss"] /= val_steps_per_epoch
+            epoch_loss["val_loss"] = float(epoch_loss["val_loss"])
+
+            epoch_metrics.update(
+                {
+                    f"val_{key}": float(val)
+                    for key, val in val_batch_metrics.compute().items()
+                }
+            )
+
+        epoch_metrics.update({**epoch_loss})
+
+        epoch_end_time = time.time()
+        epoch_metrics.update({"epoch_time": epoch_end_time - epoch_start_time})
+
+        ckpt = {"model": state, "epoch": epoch}
+        if epoch % ckpt_interval == 0:
+            ckpt_manager.save_checkpoint(ckpt, epoch, latest_dir)
+
+        if epoch_metrics["val_loss"] < best_loss:
+            best_loss = epoch_metrics["val_loss"]
+            ckpt_manager.save_checkpoint(ckpt, epoch, best_dir)
+            early_stopping_counter = 0
+        else:
+            early_stopping_counter += 1
+
+        callbacks.on_epoch_end(epoch=epoch, logs=epoch_metrics)
+
+        epoch_pbar.set_postfix(val_loss=epoch_metrics["val_loss"])
+        epoch_pbar.update()
+
+        if patience is not None and early_stopping_counter >= patience:
+            log.info(
+                "Early stopping patience exceeded. Stopping training after"
+                f" {epoch} epochs."
+            )
+            break
+    epoch_pbar.close()
     callbacks.on_train_end()