Skip to content

Commit a529899

Browse files
committed
added from excel to codelists class
1 parent 2c3c716 commit a529899

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

phenex/codelists/codelists.py

+40
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,46 @@ def from_yaml(cls, path: str) -> "Codelist":
8585
data, name=os.path.basename(path.replace(".yaml", "").replace(".yml", ""))
8686
)
8787

88+
@classmethod
89+
def from_excel(
90+
cls,
91+
path: str,
92+
sheet_name: Optional[str] = None,
93+
code_column: Optional[str] = "code",
94+
code_type_column: Optional[str] = "code_type",
95+
codelist_column: Optional[str] = None,
96+
codelist_name: Optional[str] = None,
97+
) -> "Codelist":
98+
"""
99+
Load a codelist from a yaml file.
100+
"""
101+
import pandas as pd
102+
103+
if sheet_name is None:
104+
_df = pd.read_excel(path)
105+
else:
106+
xl = pd.ExcelFile(path)
107+
if sheet_name not in xl.sheet_names:
108+
raise ValueError(
109+
f"Sheet name {sheet_name} not found in the Excel file."
110+
)
111+
_df = xl.parse(sheet_name)
112+
113+
if codelist_name is not None:
114+
# codelist name is not none, therefore we subset the table to the current codelist
115+
_df = _df[_df[codelist_column] == codelist_name]
116+
117+
code_dict = _df.groupby(code_type_column)[code_column].apply(list).to_dict()
118+
119+
if codelist_name is None:
120+
name = codelist_name
121+
elif sheet_name is not None:
122+
name = sheet_name
123+
else:
124+
name = path.split(os.sep)[-1].replace(".xlsx", "")
125+
126+
return cls(code_dict, name=name)
127+
88128
def to_tuples(self) -> List[tuple]:
89129
"""
90130
Convert the codelist to a list of tuples, where each tuple is of the form

0 commit comments

Comments
 (0)