Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit UTF-8 encoding for VASP input files with zopen, and open for other text files #4218

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
597ab65
explicit utf-8 encoding for kpoints from file
DanielYang59 Dec 6, 2024
5e41f1a
explicit utf-8 elsewhere
DanielYang59 Dec 6, 2024
1767195
fix root level and dev_scripts
DanielYang59 Dec 6, 2024
f575e74
simplify PMG PKG path
DanielYang59 Dec 6, 2024
052e949
fix analysis, cli, command_line
DanielYang59 Dec 6, 2024
9d09765
fix electronic_structure, entries and ext
DanielYang59 Dec 6, 2024
3f7b180
fix io, phonon and symmetry
DanielYang59 Dec 6, 2024
bd90e90
fix alchemy and anlysis tests
DanielYang59 Dec 6, 2024
5b8ced4
fix apps, command_line, core, elec_struct, entries, ext and vis tests
DanielYang59 Dec 6, 2024
b8d3b75
finish io and phonon tests
DanielYang59 Dec 6, 2024
c54d772
remove unnecessary seek
DanielYang59 Dec 6, 2024
bea91bd
revert encoding for json dump
DanielYang59 Dec 6, 2024
e58a4ed
type custom paths
DanielYang59 Dec 6, 2024
8a0490c
revert another json dump
DanielYang59 Dec 6, 2024
0d9de77
ignore userwarning by default
DanielYang59 Dec 7, 2024
5af79f7
relocate test-only env var
DanielYang59 Dec 7, 2024
308597a
remove unneeded default tag for non-userwarning
DanielYang59 Dec 7, 2024
1cd1aac
also explicit utf-8 for json dump though forced ASCII
DanielYang59 Dec 7, 2024
4206b7d
utf8 is alias to utf-8 in codecs, but maybe prefer the standard name
DanielYang59 Dec 8, 2024
6a90d2d
fix missing encoding in comment
DanielYang59 Dec 10, 2024
436356f
add test for Γ decoding
DanielYang59 Dec 10, 2024
2608e8a
better error message
DanielYang59 Dec 10, 2024
9259f13
Merge branch 'master' into kpoints-encoding
shyuep Dec 11, 2024
ff46384
Merge branch 'master' into kpoints-encoding
DanielYang59 Dec 11, 2024
59148a0
Merge branch 'master' into kpoints-encoding
DanielYang59 Dec 11, 2024
140b8b1
Merge branch 'master' into kpoints-encoding
shyuep Dec 12, 2024
25e5a38
Merge remote-tracking branch 'upstream/master' into kpoints-encoding
DanielYang59 Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions dev_scripts/regen_libxcfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from __future__ import annotations

import json
import os
import sys
from copy import deepcopy

from pymatgen.core import PKG_DIR


def parse_libxc_docs(path):
"""Parse libxc_docs.txt file, return dictionary {libxc_id: info_dict}."""
Expand All @@ -27,7 +28,7 @@ def parse_section(section):
return int(dct["Number"]), dct

dct = {}
with open(path) as file:
with open(path, encoding="utf-8") as file:
section = []
for line in file:
if not line.startswith("-"):
Expand Down Expand Up @@ -85,8 +86,7 @@ def main():
xc_funcs = parse_libxc_docs(path)

# Generate new JSON file in pycore
pmg_core = os.path.abspath("../pymatgen/core/")
json_path = f"{pmg_core}/libxc_docs.json"
json_path = f"{PKG_DIR}/core/libxc_docs.json"
write_libxc_docs_json(xc_funcs, json_path)

# Build new enum list.
Expand All @@ -99,8 +99,8 @@ def main():

# Re-generate enumerations.
# [0] read py module.
xc_funcpy_path = f"{pmg_core}/libxcfunc.py"
with open(xc_funcpy_path) as file:
xc_funcpy_path = f"{PKG_DIR}/core/libxcfunc.py"
with open(xc_funcpy_path, encoding="utf-8") as file:
lines = file.readlines()

# [1] insert new enum values in list
Expand Down
28 changes: 14 additions & 14 deletions dev_scripts/update_pt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from monty.serialization import dumpfn, loadfn
from ruamel import yaml

from pymatgen.core import Element, get_el_sp
from pymatgen.core import PKG_DIR, Element, get_el_sp

try:
from bs4 import BeautifulSoup
Expand All @@ -25,7 +25,7 @@

def parse_oxi_state():
data = loadfn(PTABLE_YAML_PATH)
with open("oxidation_states.txt") as file:
with open("oxidation_states.txt", encoding="utf-8") as file:
oxi_data = file.read()
oxi_data = re.sub("[\n\r]", "", oxi_data)
patt = re.compile("<tr>(.*?)</tr>", re.MULTILINE)
Expand Down Expand Up @@ -63,7 +63,7 @@ def parse_oxi_state():

def parse_ionic_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("ionic_radii.csv") as file:
with open("ionic_radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")
header = radii_data[0].split(",")
Expand Down Expand Up @@ -93,7 +93,7 @@ def parse_ionic_radii():

def parse_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("radii.csv") as file:
with open("radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")

Expand Down Expand Up @@ -123,7 +123,7 @@ def parse_radii():
print(el)
with open("periodic_table2.yaml", mode="w") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w") as file:
json.dump(data, file)
DanielYang59 marked this conversation as resolved.
Show resolved Hide resolved


Expand All @@ -142,7 +142,7 @@ def update_ionic_radii():
del dct["Ionic_radii_ls"]
with open("periodic_table2.yaml", mode="w") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w") as file:
json.dump(data, file)


Expand Down Expand Up @@ -180,19 +180,19 @@ def parse_shannon_radii():
data[el]["Shannon radii"] = dict(radii[el])

dumpfn(data, PTABLE_YAML_PATH)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w") as file:
json.dump(data, file)


def gen_periodic_table():
data = loadfn(PTABLE_YAML_PATH)

with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w") as file:
json.dump(data, file)


def gen_iupac_ordering():
periodic_table = loadfn("../pymatgen/core/periodic_table.json")
periodic_table = loadfn(f"{PKG_DIR}/core/periodic_table.json")
order = [
([18], range(6, 0, -1)), # noble gasses
([1], range(7, 1, -1)), # alkali metals
Expand Down Expand Up @@ -274,16 +274,16 @@ def add_electron_affinities():
missing_electron_affinities = set(range(1, 93)) - Z_set
raise ValueError(f"{missing_electron_affinities=}")
print(element_electron_affinities)
pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
val["Electron affinity"] = element_electron_affinities.get(Element(key).long_name)
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


def add_ionization_energies():
"""Update the periodic table data file with ground level and ionization energies from NIST."""

with open("NIST Atomic Ionization Energies Output.html") as file:
with open("NIST Atomic Ionization Energies Output.html", encoding="utf-8") as file:
soup = BeautifulSoup(file.read(), "html.parser")
table = None
for table in soup.find_all("table"):
Expand All @@ -302,11 +302,11 @@ def add_ionization_energies():
if not set(data).issuperset(range(1, 93)):
raise RuntimeError("Failed to get data up to Uranium")

pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
del val["Ionization energy"]
val["Ionization energies"] = data.get(Element(key).long_name, [])
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -861,18 +861,18 @@ def __init__(self, permutations_safe_override=False, only_symbols=None):
dict.__init__(self)
self.cg_list: list[CoordinationGeometry] = []
if only_symbols is None:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt") as file:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt", encoding="utf-8") as file:
data = file.readlines()
for line in data:
cg_file = f"{MODULE_DIR}/{line.strip()}"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))
else:
for symbol in only_symbols:
fsymbol = symbol.replace(":", "#")
cg_file = f"{MODULE_DIR}/coordination_geometries_files/{fsymbol}.json"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chemenv/utils/chemenv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def save(self, root_dir=None):
if test != "Y":
print("Configuration not saved")
return config_file
with open(config_file, mode="w") as file:
with open(config_file, mode="w", encoding="utf-8") as file:
json.dump(config_dict, file)
print("Configuration saved")
return config_file
Expand All @@ -171,7 +171,7 @@ def auto_load(cls, root_dir=None):
root_dir = f"{home}/.chemenv"
config_file = f"{root_dir}/config.json"
try:
with open(config_file) as file:
with open(config_file, encoding="utf-8") as file:
config_dict = json.load(file)
return ChemEnvConfig(package_options=config_dict["package_options"])

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chempot_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from __future__ import annotations

import json
import os
import warnings
from functools import lru_cache
from itertools import groupby
Expand All @@ -36,6 +35,7 @@
from scipy.spatial import ConvexHull, HalfspaceIntersection

from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition, Element
from pymatgen.util.coord import Simplex
from pymatgen.util.due import Doi, due
Expand All @@ -44,7 +44,7 @@
if TYPE_CHECKING:
from pymatgen.entries.computed_entries import ComputedEntry

with open(f"{os.path.dirname(__file__)}/../util/plotly_chempot_layouts.json") as file:
with open(f"{PKG_DIR}/util/plotly_chempot_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, filename):
# read in data from file
self._chemsys_entries = defaultdict(list)
filename = os.path.join(os.path.dirname(__file__), filename)
with open(filename) as file:
with open(filename, encoding="utf-8") as file:
reader = csv.reader(file, quotechar="|")
for row in reader:
comp = Composition(row[0])
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down Expand Up @@ -2643,7 +2643,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/hhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self):
"""Init for HHIModel."""
self.symbol_hhip_hhir = {} # symbol->(HHI_production, HHI reserve)

with open(HHI_CSV_PATH) as file:
with open(HHI_CSV_PATH, encoding="utf-8") as file:
for line in file:
if line[0] != "#":
symbol, hhi_production, hhi_reserve = line.split(",")
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/interface_reactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from __future__ import annotations

import json
import os
import warnings
from typing import TYPE_CHECKING

Expand All @@ -18,6 +17,7 @@

from pymatgen.analysis.phase_diagram import GrandPotentialPhaseDiagram, PhaseDiagram
from pymatgen.analysis.reaction_calculator import Reaction
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition
from pymatgen.util.due import Doi, due
from pymatgen.util.plotting import pretty_plot
Expand All @@ -31,7 +31,7 @@
__email__ = "mcdermott@lbl.gov"
__date__ = "Sep 1, 2021"

with open(os.path.join(os.path.dirname(__file__), "..", "util", "plotly_interface_rxn_layouts.json")) as file:
with open(f"{PKG_DIR}/util/plotly_interface_rxn_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, lambda_table=None, alpha=-5):
else:
module_dir = os.path.dirname(__file__)
json_file = f"{module_dir}/data/lambda.json"
with open(json_file) as file:
with open(json_file, encoding="utf-8") as file:
self._lambda_table = json.load(file)

# build map of specie pairs to lambdas
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/cli/pmg_potcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def gen_potcar(dirname, filename):
"""
if filename == "POTCAR.spec":
fullpath = os.path.join(dirname, filename)
with open(fullpath) as file:
with open(fullpath, encoding="utf-8") as file:
elements = file.readlines()
symbols = [el.strip() for el in elements if el.strip() != ""]
potcar = Potcar(symbols)
Expand Down
8 changes: 4 additions & 4 deletions src/pymatgen/command_line/chargemol_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def _write_jobscript_for_chargemol(
bo = ".true." if compute_bond_orders else ".false."
lines += f"\n<compute BOs>\n{bo}\n</compute BOs>\n"

with open("job_control.txt", mode="w") as file:
with open("job_control.txt", mode="w", encoding="utf-8") as file:
file.write(lines)

@staticmethod
Expand All @@ -422,7 +422,7 @@ def _get_dipole_info(filepath):
idx = 0
start = False
dipoles = []
with open(filepath) as file:
with open(filepath, encoding="utf-8") as file:
for line in file:
if "The following XYZ" in line:
start = True
Expand Down Expand Up @@ -549,7 +549,7 @@ def _get_data_from_xyz(xyz_path) -> list[float]:
"""
props = []
if os.path.isfile(xyz_path):
with open(xyz_path) as file:
with open(xyz_path, encoding="utf-8") as file:
for idx, line in enumerate(file):
if idx <= 1:
continue
Expand All @@ -574,7 +574,7 @@ def _get_cm5_data_from_output(ddec_analysis_path) -> list[float]:
props = []
if os.path.isfile(ddec_analysis_path):
start = False
with open(ddec_analysis_path) as file:
with open(ddec_analysis_path, encoding="utf-8") as file:
for line in file:
if "computed CM5" in line:
start = True
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/command_line/enumlib_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def get_sg_info(ss):
output.append(f"{min_conc - 1} {min_conc + 1} {base}")
output.append("")
logger.debug("Generated input file:\n" + "\n".join(output))
with open("struct_enum.in", mode="w") as file:
with open("struct_enum.in", mode="w", encoding="utf-8") as file:
file.write("\n".join(output))

def _run_multienum(self):
Expand Down Expand Up @@ -357,7 +357,7 @@ def _get_structures(self, num_structs):
ordered_structure = inv_org_latt = None

for file in glob("vasp.*"):
with open(file) as file:
with open(file, encoding="utf-8") as file:
data = file.read()
data = re.sub(r"scale factor", "1", data)
data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/command_line/gulp_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ def __init__(self, bush_lewis_flag):
if bush_lewis_flag not in {"bush", "lewis"}:
raise ValueError(f"bush_lewis_flag should be bush or lewis, got {bush_lewis_flag}")
pot_file = "bush.lib" if bush_lewis_flag == "bush" else "lewis.lib"
with open(os.path.join(os.environ["GULP_LIB"], pot_file)) as file:
with open(os.path.join(os.environ["GULP_LIB"], pot_file), encoding="utf-8") as file:
# In lewis.lib there is no shell for cation
species_dict, pot_dict, spring_dict = {}, {}, {}
sp_flg, pot_flg, spring_flg = False, False, False
Expand Down Expand Up @@ -869,7 +869,7 @@ class TersoffPotential:

def __init__(self):
"""Init TersoffPotential."""
with open(f"{MODULE_DIR}/OxideTersoffPotentials") as file:
with open(f"{MODULE_DIR}/OxideTersoffPotentials", encoding="utf-8") as file:
data = {}
for row in file:
metaloxi = row.split()[0]
Expand Down
Loading
Loading