Skip to content

Commit 97551d7

Browse files
authored
Merge pull request #5 from Bayer-Group/without_cc_pt
Without cc pt
2 parents 2c3c716 + cbd072f commit 97551d7

28 files changed

+842
-69
lines changed

docs/api/codelists/codelists.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Codelist
2+
3+
::: phenex.codelists.codelists
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# ArithmeticPhenotype
2+
3+
::: phenex.phenotypes.arithmetic_phenotype
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# CategoricalPhenotype
2+
3+
::: phenex.phenotypes.categorical_phenotype
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# ContinuousCoveragePhenotype
2+
3+
::: phenex.phenotypes.continuous_coverage_phenotype
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# DeathPhenotype
2+
3+
::: phenex.phenotypes.death_phenotype
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# ScorePhenotype
2+
3+
::: phenex.phenotypes.score_phenotype

docs/api/phenotypes/sex_phenotype.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# SexPhenotype
2+
3+
::: phenex.phenotypes.sex_phenotype

mkdocs.yml

+7
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,15 @@ nav:
1818
- CodelistPhenotype: api/phenotypes/codelist_phenotype.md
1919
- MeasurementPhenotype: api/phenotypes/measurement_phenotype.md
2020
- AgePhenotype: api/phenotypes/age_phenotype.md
21+
- SexPhenotype: api/phenotypes/sec_phenotype.md
22+
- DeathPhenotype: api/phenotypes/death_phenotype.md
23+
- AgePhenotype: api/phenotypes/age_phenotype.md
24+
- ArithmeticPhenotype: api/phenotypes/arithmetic_phenotype.md
2125
- LogicPhenotype: api/phenotypes/logic_phenotype.md
26+
- ScorePhenotype: api/phenotypes/score_phenotype.md
2227
- Cohort: api/phenotypes/cohort.md
28+
- Codelists:
29+
- Codelist: api/codelists/codelists.md
2330
- License: LICENSE.md
2431

2532
plugins:

phenex/codelists/codelists.py

+57-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from typing import Dict, List, Union, Optional
3+
import pandas as pd
34

45

56
class Codelist:
@@ -85,6 +86,54 @@ def from_yaml(cls, path: str) -> "Codelist":
8586
data, name=os.path.basename(path.replace(".yaml", "").replace(".yml", ""))
8687
)
8788

89+
@classmethod
90+
def from_excel(
91+
cls,
92+
path: str,
93+
sheet_name: Optional[str] = None,
94+
codelist_name: Optional[str] = None,
95+
code_column: Optional[str] = "code",
96+
code_type_column: Optional[str] = "code_type",
97+
codelist_column: Optional[str] = "codelist",
98+
) -> "Codelist":
99+
"""
100+
Load a codelist from an Excel file.
101+
102+
Parameters:
103+
path: path to the excel file.
104+
sheet_name: an optional label for the sheet to read from. If defined, the codelist will be taken from that sheet. If no sheet_name is defined, the first sheet is taken.
105+
codelist_name: an optional name of the codelist which to extract. If defined, codelist_column must be present and the codelist_name must occur within the codelist_column.
106+
code_column: the name of the column containing the codes.
107+
code_type_column: the name of the column containing the code types.
108+
codelist_column: the name of the column containing the codelist names.
109+
"""
110+
import pandas as pd
111+
112+
if sheet_name is None:
113+
_df = pd.read_excel(path)
114+
else:
115+
xl = pd.ExcelFile(path)
116+
if sheet_name not in xl.sheet_names:
117+
raise ValueError(
118+
f"Sheet name {sheet_name} not found in the Excel file."
119+
)
120+
_df = xl.parse(sheet_name)
121+
122+
if codelist_name is not None:
123+
# codelist name is not none, therefore we subset the table to the current codelist
124+
_df = _df[_df[codelist_column] == codelist_name]
125+
126+
code_dict = _df.groupby(code_type_column)[code_column].apply(list).to_dict()
127+
128+
if codelist_name is None:
129+
name = codelist_name
130+
elif sheet_name is not None:
131+
name = sheet_name
132+
else:
133+
name = path.split(os.sep)[-1].replace(".xlsx", "")
134+
135+
return cls(code_dict, name=name)
136+
88137
def to_tuples(self) -> List[tuple]:
89138
"""
90139
Convert the codelist to a list of tuples, where each tuple is of the form
@@ -101,8 +150,15 @@ def __repr__(self):
101150
codelist={self.codelist}
102151
)"""
103152

153+
def to_pandas(self) -> pd.DataFrame:
154+
"""
155+
Convert the codelist to a pandas DataFrame.
156+
"""
157+
158+
_df = pd.DataFrame(self.to_tuples(), columns=["code_type", "code"])
159+
_df['codelist'] = self.name
160+
return _df
104161

105-
import pandas as pd
106162

107163

108164
class LocalCSVCodelistFactory:

phenex/filters/aggregator.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ def __init__(
88
aggregation_index=["PERSON_ID"],
99
aggregation_function="sum",
1010
event_date_column="EVENT_DATE",
11+
reduce=False
1112
):
1213
self.aggregation_index = aggregation_index
1314
self.aggregation_function = aggregation_function
1415
self.event_date_column = event_date_column
16+
self.reduce = reduce
1517

1618
def aggregate(self, input_table: Table):
1719
# Define the window specification
@@ -35,12 +37,20 @@ def aggregate(self, input_table: Table):
3537
)
3638

3739
# Add the aggregated date as a new column
38-
table = input_table.mutate(aggregated_date=aggregated_date)
40+
input_table = input_table.mutate(aggregated_date=aggregated_date)
3941

4042
# Filter rows where the original date matches the aggregated date
41-
result = table.filter(table[self.event_date_column] == table.aggregated_date)
42-
return result
43+
input_table = input_table.filter(input_table[self.event_date_column] == input_table.aggregated_date)
44+
45+
# Select the necessary columns
46+
47+
# Apply the distinct reduction if required
48+
if self.reduce:
49+
selected_columns = self.aggregation_index + [self.event_date_column]
50+
input_table = input_table.select(selected_columns).distinct()
51+
input_table = input_table.mutate(VALUE=ibis.null())
4352

53+
return input_table
4454

4555
class Nearest(VerticalDateAggregator):
4656
def __init__(self, **kwargs):

phenex/filters/categorical_filter.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from phenex.filters.filter import Filter
2+
from typing import List, Optional, Union
3+
from ibis.expr.types.relations import Table
4+
5+
class CategoricalFilter(Filter):
6+
"""
7+
This class filters events in an EventTable based on specified categorical values
8+
9+
Attributes:
10+
category (Optional[str]): The category to filter events by.
11+
12+
Methods:
13+
_filter(table: MeasurementTable) -> MeasurementTable:
14+
Filters the given MeasurementTable based on the specified category.
15+
Parameters:
16+
table (Measurement): The table containing events to be filtered.
17+
Returns:
18+
MeasurementTable: The filtered MeasurementTable with events matching the category.
19+
"""
20+
21+
def __init__(
22+
self,
23+
column_name: str,
24+
allowed_values: List[Union[str, int]],
25+
domain: Optional[str] = None
26+
):
27+
self.column_name = column_name
28+
self.allowed_values = allowed_values
29+
self.domain = domain
30+
super(CategoricalFilter, self).__init__()
31+
32+
def _filter(self, table: Table):
33+
table = table.filter(table[self.column_name].isin(self.allowed_values))
34+
return table

phenex/filters/codelist_filter.py

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def __init__(self, codelist: Codelist, name=None, use_code_type: bool = True):
2626

2727
def _convert_codelist_to_tuples(self) -> List[Tuple[str, str]]:
2828
if self.codelist is not None:
29+
if not isinstance(self.codelist, Codelist):
30+
raise ValueError("Codelist must be an instance of Codelist")
2931
return [
3032
(ct, c) for ct, codes in self.codelist.codelist.items() for c in codes
3133
]

0 commit comments

Comments
 (0)