Skip to content

Commit

Permalink
Update job_name and out_dir to match actual new pred_file locat…
Browse files Browse the repository at this point in the history
…ions in `test_<model>_discovery.py` scripts (#204)

* rename Model.chgnet to Model.chgnet_030 and Model.m3gnet to Model.m3gnet_ms

- update job_name and out_dir to match actual new pred_file locations in test_discovery.py scripts
- restore and test __repr__ and __str__ methods on Files enum

* fix tests/test_cli.py and add site/tests/table-column-toggle-menu.test.ts

delete accidentally committed site/src/lib/ScatterPlot.svelte (orig from elementari)

* fix MetricsTable.svelte rendering missing metrics as undefined, should be 'n/a'
  • Loading branch information
janosh authored Feb 9, 2025
1 parent 2f7ea53 commit 9fa5baa
Show file tree
Hide file tree
Showing 39 changed files with 269 additions and 377 deletions.
18 changes: 12 additions & 6 deletions matbench_discovery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,14 @@ def __new__(cls, val: str, file_path: str) -> Self:
obj.__dict__ |= dict(file_path=file_path)
return obj

def __repr__(self) -> str:
"""String representation of the file."""
return f"{type(self).__name__}.{self.name}"

def __str__(self) -> str:
"""String representation of the file."""
return self.name

@property
@abc.abstractmethod
def url(self) -> str:
Expand Down Expand Up @@ -252,15 +260,13 @@ class Model(Files, base_dir=f"{ROOT}/models"):
"""

alignn = auto(), "alignn/alignn.yml"
# alignn_pretrained = auto(), "alignn/alignn.yml"
# alignn_ff = auto(), "alignn/alignn-ff.yml"

# BOWSR optimizer coupled with original megnet
bowsr_megnet = auto(), "bowsr/bowsr.yml"

# default CHGNet model from publication with 400,438 params
chgnet = auto(), "chgnet/chgnet.yml"
# chgnet_no_relax = auto(), None, "CHGNet No Relax"
chgnet_030 = auto(), "chgnet/chgnet-0.3.0.yml"

# CGCNN 10-member ensemble
cgcnn = auto(), "cgcnn/cgcnn.yml"
Expand All @@ -273,9 +279,9 @@ class Model(Files, base_dir=f"{ROOT}/models"):
dpa3_v1_openlam = auto(), "deepmd/dpa3-v1-openlam.yml"

# original M3GNet straight from publication, not re-trained
m3gnet = auto(), "m3gnet/m3gnet.yml"
# m3gnet_direct = auto(), None, "M3GNet DIRECT"
# m3gnet_ms = auto(), None, "M3GNet MS"
m3gnet_ms = auto(), "m3gnet/m3gnet.yml"
# m3gnet_direct = auto(), "M3GNet DIRECT"
# m3gnet_ms = auto(), "M3GNet MS"

# MACE-MP-0 medium as published in https://arxiv.org/abs/2401.00096 trained on MPtrj
mace_mp_0 = auto(), "mace/mace-mp-0.yml"
Expand Down
Binary file removed models/alignn/2023-06-02-alignn-wbm-IS2RE.csv.gz
Binary file not shown.
Binary file not shown.
54 changes: 0 additions & 54 deletions models/alignn/alignn-config.json

This file was deleted.

File renamed without changes.
8 changes: 4 additions & 4 deletions models/alignn/alignn.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ model_params: 4_026_753 # pre-trained 'mp_e_form_alignn' and our custom MBD chec
n_estimators: 1
pr_url: https://github.com/janosh/matbench-discovery/pull/85

# model trained from specifically for MBD
# model trained on Materials Project energies specifically for this submission
training_set: [MP 2022]

metrics:
phonons: not available # model doesn't predict forces
geo_opt: not available
phonons: not applicable # reason: ALIGNN does not predict forces
geo_opt: not applicable # reason: ALIGNN does not predict forces
discovery:
pred_file: models/alignn/2023-06-02-alignn-wbm-IS2RE.csv.gz
pred_file: models/alignn/alignn-mp22/2023-06-02-wbm-IS2RE.csv.gz
pred_file_url: https://figshare.com/ndownloader/files/51607262
pred_col: e_form_per_atom_alignn
full_test_set:
Expand Down
2 changes: 1 addition & 1 deletion models/alignn/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ Replace `/path/to/` with the actual path to the patch file.
The directory contains the following files, which must be executed in the given order to reproduce the results:

1. [`train_alignn.py`](train_alignn.py): Train an ALIGNN model on all 154k MP computed structure entries. The resulting model checkpoint is saved to the `out_dir` variable in that script and also uploaded to `wandb` from where it is publicly available for 3rd party reproducibility.
1. [`test_alignn.py`](test_alignn.py): Test a trained ALIGNN model on the WBM data. Generated `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
1. [`test_alignn_discovery.py`](test_alignn_discovery.py): Test a trained ALIGNN model on the WBM data. Generated `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
8 changes: 4 additions & 4 deletions models/alignn/test_alignn_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from matbench_discovery import today
from matbench_discovery.data import df_wbm
from matbench_discovery.enums import DataFiles, MbdKey, Task
from matbench_discovery.enums import DataFiles, MbdKey, Model, Task
from matbench_discovery.hpc import slurm_submit
from matbench_discovery.plots import wandb_scatter

Expand All @@ -30,13 +30,13 @@

# %%
# model_name = "mp_e_form_alignn" # pre-trained by NIST (not used for MBD submission)
model_name = DataFiles.alignn_checkpoint.path # trained by Philipp Benner
model_name = Model.alignn # trained by Philipp Benner
task_type = Task.IS2RE
target_col = MbdKey.e_form_dft
input_col = Key.init_struct
device = "cuda" if torch.cuda.is_available() else "cpu"
job_name = f"{model_name}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
job_name = f"{model_name}/{today}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")


if model_name in all_models: # load pre-trained model
Expand Down
10 changes: 4 additions & 6 deletions models/alignn/train_alignn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from tqdm import tqdm

from matbench_discovery import today
from matbench_discovery.enums import DataFiles
from matbench_discovery.enums import DataFiles, Model
from matbench_discovery.hpc import slurm_submit

__author__ = "Philipp Benner, Janosh Riebesell"
Expand All @@ -28,20 +28,18 @@


# %%
model_name = "alignn-mp-e_form"
model_name = f"{Model.alignn}-mp-e_form"
target_col = Key.form_energy
input_col = "atoms"
device = "cuda" if torch.cuda.is_available() else "cpu"
job_name = f"train-{model_name}"
job_name = f"{today}-train-{model_name}"


pred_col = "e_form_per_atom_alignn"
with open(f"{module_dir}/alignn-config.json") as file:
config = TrainingConfig(**json.load(file))

config.output_dir = out_dir = os.getenv(
"SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}"
)
config.output_dir = out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")

slurm_vars = slurm_submit(
job_name=job_name,
Expand Down
4 changes: 2 additions & 2 deletions models/alignn_ff/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ OSError: [Errno 24] Too many open files

## Scripts

1. `alignn_ff_relax.py`: Relax WBM test set structures. Set the variable `n_splits` to the number of GPU compute nodes. On each compute node, set the environment variable `TASK_ID` to a value in the range 1-`n_splits`. Set the variable `n_processes_per_task` to the number of processes on a single node. For 48 CPU cores with 4 GPUs a good setting is to use 10 processes.
2. `test_alignn_ff.py`: Read the relaxed structures from `alignn_ff_relax.py` and make formation energy predictions. Set the variable `n_splits` accordingly.
1. [`alignn_ff_relax.py`](alignn_ff_relax.py): Relax WBM test set structures. Set the variable `n_splits` to the number of GPU compute nodes. On each compute node, set the environment variable `TASK_ID` to a value in the range 1-`n_splits`. Set the variable `n_processes_per_task` to the number of processes on a single node. For 48 CPU cores with 4 GPUs a good setting is to use 10 processes.
2. [`test_alignn_ff_discovery.py`](test_alignn_ff_discovery.py): Read the relaxed structures from `alignn_ff_relax.py` and make formation energy predictions. Set the variable `n_splits` accordingly.
18 changes: 5 additions & 13 deletions models/alignn_ff/test_alignn_ff_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from matbench_discovery import today
from matbench_discovery.data import df_wbm
from matbench_discovery.enums import DataFiles, MbdKey, Task
from matbench_discovery.enums import DataFiles, MbdKey, Model, Task
from matbench_discovery.plots import wandb_scatter

__author__ = "Philipp Benner, Janosh Riebesell"
Expand All @@ -33,10 +33,9 @@
# model_name = "mp_e_form_alignnn" # pre-trained by NIST
task_type = Task.IS2RE
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = f"alignn-ff-wbm-{task_type}"
job_name = f"{model_name}-relaxed-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
in_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
model_name = f"{Model.alignn}-ff"
job_name = f"{model_name}/{today}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")


if model_name in all_models: # load pre-trained model
Expand Down Expand Up @@ -101,14 +100,7 @@
df_wbm[pred_col] -= df_wbm.e_correction_per_atom_mp_legacy
df_wbm[pred_col] += df_wbm.e_correction_per_atom_mp2020

if model_name in all_models:
df_wbm[pred_col].round(4).to_csv(
f"{module_dir}/{today}-{model_name}-relaxed-wbm-IS2RE.csv.gz"
)
else:
df_wbm[pred_col].round(4).to_csv(
f"{module_dir}/{today}-alignn-relaxed-wbm-IS2RE.csv.gz"
)
df_wbm[pred_col].round(4).to_csv(f"{out_dir}/wbm-IS2RE.csv.gz")


# %%
Expand Down
16 changes: 8 additions & 8 deletions models/bowsr/test_bowsr_discovery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""To slurm submit this file: python path/to/file.py slurm-submit
Requires MEGNet and MAML installation: pip install megnet maml
https://github.com/materialsvirtuallab/maml
"""

# %%
import contextlib
import os
Expand All @@ -21,11 +26,6 @@
__author__ = "Janosh Riebesell"
__date__ = "2022-08-15"

"""
To slurm submit this file: python path/to/file.py slurm-submit
Requires MEGNet and MAML installation: pip install megnet maml
https://github.com/materialsvirtuallab/maml
"""

task_type = Task.IS2RE
module_dir = os.path.dirname(__file__)
Expand All @@ -34,9 +34,9 @@
# see https://stackoverflow.com/a/55431306 for how to change array throttling
# post submission
slurm_max_parallel = 100
energy_model = Model.megnet.label.lower()
job_name = f"bowsr-{energy_model}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
energy_model = Model.megnet
job_name = f"bowsr-{energy_model}/{today}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")

data_path = {
Task.IS2RE: DataFiles.wbm_initial_structures.path,
Expand Down
18 changes: 10 additions & 8 deletions models/cgcnn/test_cgcnn_discovery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
Download WandB checkpoints for an ensemble of CGCNN models trained on all MP
formation energies, then make predictions on some dataset, prints ensemble metrics and
saves predictions to CSV.
"""

# %%
import os
from importlib.metadata import version
Expand All @@ -15,24 +21,20 @@

from matbench_discovery import CHECKPOINT_DIR, WANDB_PATH, WBM_DIR, today
from matbench_discovery.data import df_wbm
from matbench_discovery.enums import DataFiles, MbdKey, Task
from matbench_discovery.enums import DataFiles, MbdKey, Model, Task
from matbench_discovery.hpc import slurm_submit
from matbench_discovery.plots import wandb_scatter

__author__ = "Janosh Riebesell"
__date__ = "2022-08-15"

"""
Download WandB checkpoints for an ensemble of CGCNN models trained on all MP
formation energies, then make predictions on some dataset, prints ensemble metrics and
saves predictions to CSV.
"""

task_type = Task.IS2RE
debug = False
job_name = f"test-cgcnn-wbm-{task_type}"
model_name = Model.cgcnn # or Model.cgcnn_p
job_name = f"{model_name}/{today}-wbm-{task_type}"
module_dir = os.path.dirname(__file__)
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")

slurm_vars = slurm_submit(
job_name=job_name,
Expand Down
10 changes: 4 additions & 6 deletions models/cgcnn/train_cgcnn.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Train a CGCNN ensemble on target_col of data_path."""

# %%
import os
from importlib.metadata import version
Expand All @@ -17,10 +19,6 @@
from matbench_discovery.hpc import slurm_submit
from matbench_discovery.structure import perturb_structure

"""
Train a CGCNN ensemble on target_col of data_path.
"""

__author__ = "Janosh Riebesell"
__date__ = "2022-06-13"

Expand All @@ -32,12 +30,12 @@
# 0 for no perturbation, n>1 means train on n perturbations of each crystal
# in the training set all assigned the same original target energy
n_perturb = 0
job_name = f"train-cgcnn-robust-{n_perturb=}"
job_name = f"{today}-train-cgcnn-robust-{n_perturb=}"
print(f"{job_name=}")
robust = "robust" in job_name.lower()
ensemble_size = 10
module_dir = os.path.dirname(__file__)
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")

slurm_vars = slurm_submit(
job_name=job_name,
Expand Down
2 changes: 1 addition & 1 deletion models/chgnet/analyze_chgnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


# %%
df_chgnet = df_chgnet_v030 = pd.read_csv(Model.chgnet.discovery_path)
df_chgnet = df_chgnet_v030 = pd.read_csv(Model.chgnet_030.discovery_path)
df_chgnet_v020 = pd.read_csv(
f"{module_dir}/2023-03-06-chgnet-0.2.0-wbm-IS2RE.csv.gz", index_col=Key.mat_id
)
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions models/chgnet/ctk_structure_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
e_form_2000 = "e_form_per_atom_chgnet_2000"
e_form_500 = "e_form_per_atom_chgnet_500"

df_chgnet = pd.read_json(Model.chgnet.geo_opt_path)
df_chgnet = pd.read_json(Model.chgnet_030.geo_opt_path)
df_chgnet = df_chgnet.set_index(Key.mat_id)

df_chgnet_2000 = pd.read_csv(Model.chgnet.discovery_path)
df_chgnet_2000 = pd.read_csv(Model.chgnet_030.discovery_path)
df_chgnet_2000 = df_chgnet_2000.set_index(Key.mat_id).add_suffix("_2000")
df_chgnet[list(df_chgnet_2000)] = df_chgnet_2000

df_chgnet_500 = pd.read_csv(Model.chgnet.discovery_path.replace("-06", "-04"))
df_chgnet_500 = pd.read_csv(Model.chgnet_030.discovery_path.replace("-06", "-04"))
df_chgnet_500 = df_chgnet_500.set_index(Key.mat_id).add_suffix("_500")
df_chgnet[list(df_chgnet_500)] = df_chgnet_500

Expand Down
8 changes: 4 additions & 4 deletions models/chgnet/test_chgnet_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from matbench_discovery import timestamp, today
from matbench_discovery.data import as_dict_handler, df_wbm
from matbench_discovery.enums import DataFiles, MbdKey, Task
from matbench_discovery.enums import DataFiles, MbdKey, Model, Task
from matbench_discovery.hpc import slurm_submit
from matbench_discovery.plots import wandb_scatter

Expand All @@ -33,9 +33,9 @@
# set large job array size for smaller data splits and faster testing/debugging
slurm_array_task_count = 50
device = "cuda" if torch.cuda.is_available() else "cpu"
chgnet = StructOptimizer(use_device=device) # load default pre-trained CHGNnet model
job_name = f"chgnet-{chgnet.version}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
chgnet = StructOptimizer(use_device=device) # load default pre-trained CHGNet model
job_name = f"{Model.chgnet_030}/{today}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{job_name}")

slurm_vars = slurm_submit(
job_name=job_name,
Expand Down
4 changes: 2 additions & 2 deletions models/deepmd/join_dpa3_preds.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@

from matbench_discovery.data import df_wbm
from matbench_discovery.energy import get_e_form_per_atom, mp_elemental_ref_energies
from matbench_discovery.enums import DataFiles
from matbench_discovery.enums import DataFiles, Model

e_form_dp_col = "e_form_per_atom_dp"
results = "./results"
model_name = "dpa3"
model_name = Model.dpa3_v1_mptrj # or Model.dpa3_v1_openlam
module_dir = os.path.dirname(__file__)
out_path = f"{module_dir}/{model_name}"
files = sorted(glob(f"{results}/{model_name}-*.json.gz"))
Expand Down
File renamed without changes.
Loading

0 comments on commit 9fa5baa

Please sign in to comment.