-
Notifications
You must be signed in to change notification settings - Fork 0
/
all_formula.py
130 lines (110 loc) · 3.74 KB
/
all_formula.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
A simple example that uses the NOMAD client library to access the archive.
Modified from source: https://nomad-lab.eu/prod/rae/docs/archive.html#first-example
to download all chemical formulas `chemical_composition_reduced` along with their calculation ids `calc_id` and `total_energy`.
"""
import pandas as pd
from tqdm import tqdm
from nomad.client import ArchiveQuery
# exclude noble gases and certain radioactive elements, source: https://github.com/sparks-baird/mat_discover/blob/4e65b710b948c7ce269cc1741c12e219507aa2dd/mat_discover/utils/generate_elasticity_data.py#L74-L76
# fmt: off
excluded_elements = [
"He", "Ne", "Ar", "Kr", "Xe", "Rn", "U", "Th", "Rn", "Tc", "Po", "Pu", "Pa",
]
# fmt: on
# %% query NOMAD database
query = ArchiveQuery(
# url="http://nomad-lab.eu/prod/rae/api",
query={"$and": {"domain": "dft", "$not": {"atoms": excluded_elements}}},
required={
# "section_run": {
# "section_single_configuration_calculation[-1]": {"energy_total": "*",},
# "section_system": {"chemical_composition_reduced": "*"},
# },
"section_metadata": {"calc_id": "*", "formula": "*"},
},
per_page=3000,
max=None,
)
print(query)
# %% extract values
# initialize
calc_ids = []
formulas = []
for i, result in enumerate(tqdm(query)):
if result.section_metadata is not None:
# Checking if nested attribute exists https://stackoverflow.com/a/29855744/13697228
calc_ids.append(result.section_metadata.calc_id)
formulas.append(result.section_metadata.formula)
else:
calc_ids.append(None)
formulas.append(None)
# %% combine and save
df = pd.DataFrame(
{
"calc_id": calc_ids,
"formula": formulas,
# "hartree_total_energy": hartree_total_energies,
}
)
df.to_csv("all-formula.csv", index=False)
1 + 1 # breakpoint
# %% Code Graveyard
# required={
# "section_run": {
# "energy_total": "*",
# "section_single_configuration_calculation": {
# "single_configuration_calculation_to_system_ref": "*"
# },
# }
# },
# required={
# "section_run": {
# "section_single_configuration_calculation": "*",
# "section_system": "*",
# }
# },
# required={
# "section_run": {"section_single_configuration_calculation": "*"},
# "section_system": "*",
# },
# "single_configuration_calculation_to_system_ref": {
# "chemical_composition_reduced": "*"
# },
# query={"domain": "dft", "atoms": ["Po"]},
# formulas = [
# result.section_run[0]
# .section_single_configuration_calculation[-1]
# .single_configuration_calculation_to_system_ref.chemical_composition_reduced
# for result in query
# ]
# from mendeleev.fetch import fetch_table
# all_elements = fetch_table("elements")["symbol"]
# elements = list(set(all_elements) - set(excluded_elements))
# "dft.optimade": [
# f"NOT (elements HAS ANY {excluded_elements})".replace("'", '"')
# ],
# calc_ids = [
# result.section_metadata.calc_id if result.section_metadata is not None else None
# for result in query
# ]
# formulas = [
# result.section_metadata.formula if result.section_metadata is not None else None
# for result in query
# ]
# formulas = [
# result.section_run[0].section_system[0].chemical_composition_reduced
# for result in query
# ]
# total_energies = [
# result.section_run[0].section_single_configuration_calculation[-1].energy_total
# if len(result.section_run) > 1
# and len(result.section_run.section_single_configuration_calculation) > 1
# else None
# for result in query
# ]
# hartree_total_energies = [
# total_energy.to(units.hartree).m if total_energy is not None else None
# for total_energy in total_energies
# ]
# from nomad.metainfo import units