Skip to content

Commit

Permalink
Also allow aliases in CSV parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Dec 1, 2023
1 parent 058b9e0 commit 8bf3b28
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/mc_optimade/mc_optimade/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def _parse_and_assign_properties(
file_ext = _path.suffix
for parser in PROPERTY_PARSERS[file_ext]:
try:
properties = parser(_path)
properties = parser(_path, property_definitions)
for id in properties:
parsed_properties[id].update(properties[id])
all_property_fields |= set(properties[id].keys())
Expand Down
17 changes: 16 additions & 1 deletion src/mc_optimade/mc_optimade/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@ def pybtex_to_optimade(bib_entry: Any, properties=None) -> EntryResource:
raise NotImplementedError


def load_csv_file(p: Path, id_key: str = "id") -> dict[str, dict[str, Any]]:
def load_csv_file(
p: Path,
properties: list[PropertyDefinition] | None = None,
) -> dict[str, dict[str, Any]]:
"""Parses a CSV file found at path `p` and returns a dictionary
of properties keyed by ID.
Will use the first column that contains the substring "id", which will
be matched with the generated IDs.
Parameters:
p: Path to the CSV file.
properties: List of property definitions to extract from the CSV file.
Returns:
A dictionary of ID -> properties.
Expand All @@ -42,6 +49,14 @@ def load_csv_file(p: Path, id_key: str = "id") -> dict[str, dict[str, Any]]:
df["id"] = df[id_key]
df = df.set_index("id")

for prop in properties or []:
# loop through any property aliases, saving the value if found and only checking
# the real name if not
for alias in prop.aliases or []:
if alias in df:
df[prop.name] = df[alias]
break

return df.to_dict(orient="index")


Expand Down

0 comments on commit 8bf3b28

Please sign in to comment.