janosh · janosh · Dec 16, 2024 · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -391,7 +391,8 @@ class Model(Files, base_dir=f"{ROOT}/models"):
     # m3gnet_ms = None, "M3GNet MS"
 
     # MACE-MP-0 medium as published in https://arxiv.org/abs/2401.00096 trained on MPtrj
-    mace = "mace/mace.yml"
+    mace = "mace/mace-mp-0.yml"
+    mace_mpa_0 = "mace/mace-mpa-0.yml"  # trained on MPtrj and Alexandria
 
     # original MEGNet straight from publication, not re-trained
     megnet = "megnet/megnet.yml"

diff --git a/models/mace/2023-07-22-mace-wbm-IS2RE.csv.gz b/models/mace/2023-07-22-mace-wbm-IS2RE.csv.gz
diff --git a/models/mace/2023-08-14-mace-wbm-IS2RE-FIRE.csv.gz b/models/mace/2023-08-14-mace-wbm-IS2RE-FIRE.csv.gz
diff --git a/models/mace/2023-09-02-mace-wbm-IS2RE-FIRE.csv.gz b/models/mace/2023-09-02-mace-wbm-IS2RE-FIRE.csv.gz
diff --git a/models/mace/2023-11-02-mace-wbm-IS2RE-FIRE.csv.gz b/models/mace/2023-11-02-mace-wbm-IS2RE-FIRE.csv.gz
diff --git a/models/mace/join_mace_preds.py b/models/mace/join_mace_preds.py
@@ -14,7 +14,7 @@
 from tqdm import tqdm
 
 from matbench_discovery.data import DataFiles, as_dict_handler, df_wbm
-from matbench_discovery.energy import get_e_form_per_atom
+from matbench_discovery.energy import calc_energy_from_e_refs, mp_elemental_ref_energies
 from matbench_discovery.enums import MbdKey, Task
 
 __author__ = "Janosh Riebesell"
@@ -24,11 +24,12 @@
 # %%
 module_dir = os.path.dirname(__file__)
 task_type = Task.IS2RE
-e_form_mace_col = "e_form_per_atom_mace"
-date = "2024-07-20"
+date = "2024-12-09"
 glob_pattern = f"{date}-mace-wbm-{task_type}*/*.json.gz"
 file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
 print(f"Found {len(file_paths):,} files for {glob_pattern = }")
+
+e_form_mace_col = "e_form_per_atom_mace"
 struct_col = "mace_structure"
 
 dfs: dict[str, pd.DataFrame] = {}
@@ -78,13 +79,18 @@
 
 # %% compute corrected formation energies
 df_mace[Key.formula] = df_wbm[Key.formula]
-df_mace[e_form_mace_col] = [
-    get_e_form_per_atom(dict(energy=cse.energy, composition=formula))
-    for formula, cse in tqdm(
-        df_mace.set_index(Key.formula)[Key.computed_structure_entry].items(),
-        total=len(df_mace),
+
+print("Calculating formation energies")
+e_form_list: dict[str, float] = {}
+for mat_id, row in tqdm(df_mace.iterrows(), total=len(df_mace)):
+    e_form = calc_energy_from_e_refs(
+        row["formula"],
+        ref_energies=mp_elemental_ref_energies,
+        total_energy=row[Key.computed_structure_entry].energy,
     )
-]
+    e_form_list[mat_id] = e_form
+
+df_mace[e_form_mace_col] = e_form_list
 df_wbm[[*df_mace]] = df_mace
 
 

diff --git a/models/mace/mace.yml → models/mace/mace-mp-0.yml b/models/mace/mace.yml → models/mace/mace-mp-0.yml
@@ -1,5 +1,5 @@
-model_name: MACE
-model_key: mace
+model_name: MACE-MP 0
+model_key: mace-mp-0
 model_version: v0.3.5
 matbench_discovery_version: 1.0.0
 date_added: "2023-07-14"
@@ -64,13 +64,13 @@ hyperparams:
 notes:
   Description: |
     MACE is a higher-order equivariant message-passing neural network for fast and accurate force fields.
-  Training: Using pre-trained 'medium' model released with paper.
+  Training: The Matbench Discovery submission uses the same MPtrj-trained 'medium' checkpoint used for all analysis in the MACE-MP preprint.
 
 metrics:
   phonons:
     κ_SRME: 0.647
   geo_opt:
-    pred_file: models/mace/2023-12-11-mace-wbm-IS2RE-FIRE.json.gz
+    pred_file: models/mace/mace-mp-0/2023-12-11-mace-mp-0-wbm-IS2RE-FIRE.json.gz
     pred_col: mace_structure
     symprec=1e-5:
       rmsd: 0.0194 # Å
@@ -87,7 +87,7 @@ metrics:
       symmetry_increase: 0.1353 # fraction
       n_structures: 249034 # count
   discovery:
-    pred_file: models/mace/2023-12-11-mace-wbm-IS2RE-FIRE.csv.gz
+    pred_file: models/mace/mace-mp-0/2023-12-11-mace-mp-0-wbm-IS2RE-FIRE.csv.gz
     pred_col: e_form_per_atom_mace
     full_test_set:
       F1: 0.668 # fraction

diff --git a/...ace/2023-12-11-mace-wbm-IS2RE-FIRE.csv.gz → ...023-12-11-mace-mp-0-wbm-IS2RE-FIRE.csv.gz b/...ace/2023-12-11-mace-wbm-IS2RE-FIRE.csv.gz → ...023-12-11-mace-mp-0-wbm-IS2RE-FIRE.csv.gz
diff --git a/models/mace/mace-mpa-0.yml b/models/mace/mace-mpa-0.yml
@@ -0,0 +1,142 @@
+model_name: MACE-MPA 0
+model_key: mace-mpa-0
+model_version: v0.3.9
+matbench_discovery_version: 1.3.1
+date_added: "2024-12-09"
+date_published: "2024-12-09"
+authors:
+  - name: Ilyes Batatia
+    affiliation: University of Cambridge
+    email: ilyes.batatia@ens-paris-saclay.fr
+    orcid: https://orcid.org/0000-0001-6915-9851
+  - name: David P Kovacs
+    affiliation: University of Cambridge
+    orcid: https://orcid.org/0000-0002-0854-2635
+  - name: Gregor Simm
+    affiliation: University of Cambridge
+    orcid: https://orcid.org/0000-0001-6815-352X
+  - name: Christoph Ortner
+    affiliation: University of Cambridge
+    orcid: https://orcid.org/0000-0003-1498-8120
+  - name: Gabor Csanyi
+    affiliation: University of Cambridge
+    orcid: https://orcid.org/0000-0002-8180-2034
+trained_by:
+  - name: Ilyes Batatia
+    affiliation: University of Cambridge
+    email: ilyes.batatia@ens-paris-saclay.fr
+    orcid: https://orcid.org/0000-0001-6915-9851
+repo: https://github.com/ACEsuit/mace
+doi: https://doi.org/10.48550/arXiv.2401.00096
+paper: https://arxiv.org/abs/2401.00096
+pypi: https://pypi.org/project/mace-torch
+
+requirements:
+  mace-torch: 0.3.9
+  torch: 2.5.1
+  ase: 3.23.0
+  pymatgen: 2024.11.13
+  numpy: 2.2.0
+
+openness: OSCD
+trained_for_benchmark: false
+train_task: S2EFS
+test_task: IS2RE-SR
+targets: EFS_G
+model_type: UIP
+model_params: 9_063_204
+n_estimators: 1
+
+training_set: [MPtrj, Alex]
+
+hyperparams:
+  max_force: 0.05
+  max_steps: 500
+  ase_optimizer: FIRE
+  radial_cutoff: 6.0
+
+notes:
+  Description: |
+    MACE is a higher-order equivariant message-passing neural network for fast and accurate force fields.
+  Training: Using model pre-trained on MPTraj and Alexandria.
+
+metrics:
+  phonons:
+    κ_SRME: 0.412
+  geo_opt:
+    pred_file: models/mace/mace-mpa-0/2024-12-09-mace-mpa-0-wbm-IS2RE-FIRE.json.gz
+    pred_col: mace_structure
+    symprec=1e-2:
+      rmsd: 0.0142 # Å
+      n_sym_ops_mae: 1.807 # unitless
+      symmetry_decrease: 0.0555 # fraction
+      symmetry_match: 0.8144 # fraction
+      symmetry_increase: 0.1231 # fraction
+      n_structures: 256963 # count
+    symprec=1e-5:
+      rmsd: 0.0142 # Å
+      n_sym_ops_mae: 1.8808 # unitless
+      symmetry_decrease: 0.0328 # fraction
+      symmetry_match: 0.7324 # fraction
+      symmetry_increase: 0.2306 # fraction
+      n_structures: 256963 # count
+  discovery:
+    pred_file: models/mace/mace-mpa-0/2024-12-09-mace-mpa-0-wbm-IS2RE-FIRE.csv.gz
+    pred_col: e_form_per_atom_mace
+    full_test_set:
+      F1: 0.836 # fraction
+      DAF: 4.869 # dimensionless
+      Precision: 0.836 # fraction
+      Recall: 0.836 # fraction
+      Accuracy: 0.944 # fraction
+      TPR: 0.836 # fraction
+      FPR: 0.034 # fraction
+      TNR: 0.966 # fraction
+      FNR: 0.164 # fraction
+      TP: 36844.0 # count
+      FP: 7253.0 # count
+      TN: 205618.0 # count
+      FN: 7248.0 # count
+      MAE: 0.028 # eV/atom
+      RMSE: 0.073 # eV/atom
+      R2: 0.837 # dimensionless
+      missing_preds: 4 # count
+      missing_percent: 0.00% # fraction
+    most_stable_10k:
+      F1: 0.978 # fraction
+      DAF: 6.258 # dimensionless
+      Precision: 0.957 # fraction
+      Recall: 1.0 # fraction
+      Accuracy: 0.957 # fraction
+      TPR: 1.0 # fraction
+      FPR: 1.0 # fraction
+      TNR: 0.0 # fraction
+      FNR: 0.0 # fraction
+      TP: 9566.0 # count
+      FP: 434.0 # count
+      TN: 0.0 # count
+      FN: 0.0 # count
+      MAE: 0.032 # eV/atom
+      RMSE: 0.105 # eV/atom
+      R2: 0.776 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction
+    unique_prototypes:
+      F1: 0.852 # fraction
+      DAF: 5.582 # dimensionless
+      Precision: 0.853 # fraction
+      Recall: 0.851 # fraction
+      Accuracy: 0.954 # fraction
+      TPR: 0.851 # fraction
+      FPR: 0.027 # fraction
+      TNR: 0.973 # fraction
+      FNR: 0.149 # fraction
+      TP: 28417.0 # count
+      FP: 4886.0 # count
+      TN: 177228.0 # count
+      FN: 4957.0 # count
+      MAE: 0.028 # eV/atom
+      RMSE: 0.073 # eV/atom
+      R2: 0.842 # dimensionless
+      missing_preds: 2 # count
+      missing_percent: 0.00% # fraction
diff --git a/models/mace/mace-mpa-0/2024-12-09-mace-mpa-0-wbm-IS2RE-FIRE.csv.gz b/models/mace/mace-mpa-0/2024-12-09-mace-mpa-0-wbm-IS2RE-FIRE.csv.gz
diff --git a/models/mace/test_mace.py b/models/mace/test_mace.py
@@ -33,7 +33,7 @@
 from matbench_discovery.slurm import slurm_submit
 
 __author__ = "Janosh Riebesell"
-__date__ = "2023-03-01"
+__date__ = "2024-12-09"
 
 
 # %%
@@ -48,8 +48,11 @@
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # whether to record intermediate structures into pymatgen Trajectory
 record_traj = True  # has no effect if relax_cell is False
-model_name = "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b/mace_agnesi_medium.model"
-# model_name = "https://tinyurl.com/5yyxdm76"
+model_name = {
+    "MACE-MPAlex 0 (2024-12-09)": "mace-alex-main-branch",
+    "MACE MP 0 medium (2023-03-01)": "https://tinyurl.com/5yyxdm76",
+}["MACE-MPAlex 0 (2024-12-09)"]
+
 ase_filter: Literal["frechet", "exp"] = "frechet"
 
 slurm_vars = slurm_submit(
@@ -81,10 +84,10 @@
 force_max = 0.05  # Run until the forces are smaller than this in eV/A
 checkpoint = f"{ROOT}/models/mace/checkpoints/{model_name}.model"
 dtype = "float64"
-mace_calc = mace_mp(model=model_name, device=device, default_dtype=dtype)
+mace_calc = mace_mp(model=checkpoint, device=device, default_dtype=dtype)
 
 print(f"Read data from {data_path}")
-atoms_list: list[Atoms] = ase_atoms_from_zip(data_path)
+atoms_list: list[Atoms] = np.array(ase_atoms_from_zip(data_path), dtype=object)
 
 if slurm_array_job_id == "debug":
     if smoke_test:

diff --git a/...s/energy-parity/e-form-parity-mace.svelte → ...rgy-parity/e-form-parity-mace-mp-0.svelte b/...s/energy-parity/e-form-parity-mace.svelte → ...rgy-parity/e-form-parity-mace-mp-0.svelte
diff --git a/site/src/figs/energy-parity/e-form-parity-mace-mpa-0.svelte b/site/src/figs/energy-parity/e-form-parity-mace-mpa-0.svelte
diff --git a/...igs/energy-parity/each-parity-mace.svelte → ...nergy-parity/each-parity-mace-mp-0.svelte b/...igs/energy-parity/each-parity-mace.svelte → ...nergy-parity/each-parity-mace-mp-0.svelte
diff --git a/site/src/figs/energy-parity/each-parity-mace-mpa-0.svelte b/site/src/figs/energy-parity/each-parity-mace-mpa-0.svelte
diff --git a/site/src/figs/per-element-each-errors.json b/site/src/figs/per-element-each-errors.json
diff --git a/site/src/lib/types.ts b/site/src/lib/types.ts
@@ -22,7 +22,7 @@ export type ModelStats = {
   GPUs: number // number of GPUs used
   CPUs: number // number of CPUs used
   slurm_jobs: number // number of SLURM jobs used
-  κ_SRME: number // symmetric relative mean error for thermal conductivity
+  'κ<sub>SRME</sub>': number // symmetric relative mean error for thermal conductivity
 }
 
 // how to pretty print a model stat key on the website