Merge branch '15-regression-tests' into 130-unit-conversion-is-broken

apax-hub · Oct 3, 2023 · 74a9901 · 74a9901
2 parents 39c34dc + b01a277
commit 74a9901
Show file tree

Hide file tree

Showing 8 changed files with 2,073 additions and 1,989 deletions.
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -28,7 +28,7 @@ jobs:
 
     - name: Unit Tests
       run: |
-        poetry run coverage run -m pytest tests
+        poetry run coverage run -m pytest -k "not slow"
         poetry run coverage report
 
     - name: Coverage Report

diff --git a/apax/utils/convert.py b/apax/utils/convert.py
@@ -109,7 +109,6 @@ def atoms_to_arrays(
 
         inputs["ragged"]["numbers"].append(atoms.numbers)
         inputs["fixed"]["n_atoms"].append(len(atoms))
-
         for key, val in atoms.calc.results.items():
             if key == "forces":
                 labels["ragged"][key].append(

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ pre-commit = "^2.20.0"
 isort = "^5.10.1"
 flake8 = "^5.0.4"
 sphinx-autodoc-typehints = "^1.21.8"
+pandas = "^2.1.1"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+markers =
+    slow: mark a test as slow and should only run explicitly
diff --git a/tests/regression_tests/__init__.py b/tests/regression_tests/__init__.py
diff --git a/tests/regression_tests/apax_config.yaml b/tests/regression_tests/apax_config.yaml
@@ -0,0 +1,89 @@
+n_epochs: 1000
+seed: 0
+
+data:
+  directory: models/
+  experiment: test
+
+  data_path: <PATH>
+
+  n_train: 1000
+  n_valid: 100
+
+  batch_size: 8
+  valid_batch_size: 100
+
+  shift_method: "per_element_regression_shift"
+  shift_options: {"energy_regularisation": 1.0}
+  shuffle_buffer_size: 1000
+
+  pos_unit: Ang
+  energy_unit: eV
+
+model:
+  n_basis: 7
+  n_radial: 5
+  nn: [512, 512]
+
+  r_max: 6.5
+  r_min: 0.5
+
+  calc_stress: false
+  use_zbl: true
+
+  b_init: normal
+  descriptor_dtype: fp32
+  readout_dtype: fp32
+  scale_shift_dtype: fp32
+
+metrics:
+  - name: energy
+    reductions:
+    - mae
+  - name: forces
+    reductions:
+    - mae
+    - mse
+  # - name: stress
+  #   reductions:
+  #   - mae
+  #   - mse
+
+loss:
+  - loss_type: structures
+    name: energy
+    weight: 1.0
+  - loss_type: structures
+    name: forces
+    weight: 8.0
+  - loss_type: cosine_sim
+    name: forces
+    weight: 0.1
+  # - loss_type: structures
+  #   name: stress
+  #   weight: 1.0
+
+optimizer:
+  opt_name: adam
+  opt_kwargs: {}
+  emb_lr: 0.003
+  nn_lr: 0.002
+  scale_lr: 0.0005
+  shift_lr: 0.025
+  zbl_lr: 0.001
+  transition_begin: 0
+
+callbacks:
+- name: csv
+
+checkpoints:
+  ckpt_interval: 1
+  # The options below are used for transfer learning
+  # base_model_checkpoint: null
+  # reset_layers: []
+
+progress_bar:
+  disable_epoch_pbar: false
+  disable_nl_pbar: false
+
+maximize_l2_cache: false
diff --git a/tests/regression_tests/test_apax_training.py b/tests/regression_tests/test_apax_training.py
@@ -0,0 +1,83 @@
+import os
+import pathlib
+import urllib
+import zipfile
+
+import numpy as np
+import pandas as pd
+import pytest
+import yaml
+
+from apax.train.run import run
+
+TEST_PATH = pathlib.Path(__file__).parent.resolve()
+MD22_STACHYOSE_URL = "http://www.quantum-machine.org/gdml/repo/static/md22_stachyose.zip"
+
+
+def download_and_extract_data(data_path, filename, url, file_format):
+    file_path = data_path / filename
+
+    os.makedirs(data_path, exist_ok=True)
+    urllib.request.urlretrieve(url, file_path)
+
+    with zipfile.ZipFile(file_path, "r") as zip_ref:
+        zip_ref.extractall(data_path)
+
+    return file_path.with_suffix(f".{file_format}")
+
+
+def modify_xyz_file(file_path, target_string, replacement_string):
+    new_file_path = file_path.with_name(file_path.stem + "_mod" + file_path.suffix)
+
+    with open(file_path, "r") as input_file, open(new_file_path, "w") as output_file:
+        for line in input_file:
+            # Replace all occurrences of the target string with the replacement string
+            modified_line = line.replace(target_string, replacement_string)
+            output_file.write(modified_line)
+    return new_file_path
+
+
+def load_config_and_run_training(
+    config_path, file_path, working_dir, energy_unit="eV", pos_unit="Ang"
+):
+    with open(config_path.as_posix(), "r") as stream:
+        config_dict = yaml.safe_load(stream)
+
+    config_dict["data"]["directory"] = working_dir.as_posix()
+    config_dict["data"]["data_path"] = file_path.as_posix()
+    config_dict["data"]["energy_unit"] = energy_unit
+    config_dict["data"]["pos_unit"] = pos_unit
+
+    run(config_dict)
+
+    return
+
+
+@pytest.mark.slow
+def test_regression_model_training(get_tmp_path):
+    config_path = TEST_PATH / "apax_config.yaml"
+    working_dir = get_tmp_path
+    data_path = working_dir / "data"
+    filename = "md22_stachyose.zip"
+
+    file_path = download_and_extract_data(data_path, filename, MD22_STACHYOSE_URL, "xyz")
+
+    file_path = modify_xyz_file(
+        file_path, target_string="Energy", replacement_string="energy"
+    )
+
+    load_config_and_run_training(config_path, file_path, working_dir, "kcal/mol")
+
+    current_metrics = pd.read_csv(working_dir / "test/log.csv")
+
+    comparison_metrics = {
+        "val_energy_mae": 0.2048215700433502,
+        "val_forces_mae": 0.054957914591049,
+        "val_forces_mse": 0.0056583952479869,
+        "val_loss": 0.1395589689994847,
+    }
+
+    for key in comparison_metrics.keys():
+        assert (
+            abs((np.array(current_metrics[key])[-1] / comparison_metrics[key]) - 1) < 1e-4
+        )