Skip to content

Commit

Permalink
replace threshold with "how" option to make NA filtering more intuitive
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilWun committed Oct 14, 2024
1 parent 9e56e76 commit 78d458f
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ def get(self):
return self.value


class HowEnum(Enum):
any = "any"
all = "all"

def get(self):
return self.value


class PositionEnum(Enum):
front = "front"
end = "end"
Expand Down Expand Up @@ -105,7 +113,7 @@ def process_subset(subset: Optional[str], is_int: bool = False) -> Optional[List
def drop_missing_value(
df: DataFrame,
axis: int = 0,
threshold: int = no_default,
how: str = "any",
subset: str = None,
**kwargs,
) -> DataFrame:
Expand All @@ -115,13 +123,13 @@ def drop_missing_value(
returns the new dataframe.
:param df: DataFrame
:param axis: int
:param threshold: int
:param how: HowEnum
:param subset: str containing the columns or rows separated by commas
:return: DataFrame
"""
subset = process_subset(subset, is_int=(axis == 1))

df.dropna(axis=axis, thresh=threshold, subset=subset, inplace=True)
df.dropna(axis=axis, how=how, subset=subset, inplace=True)

return df

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
)

from . import PDPreprocessing, PDPreprocessing_BLP
from .backend.pandas_preprocessing import HowEnum
from .schemas import (
FirstInputParametersSchema,
SecondInputParametersSchema,
Expand Down Expand Up @@ -244,7 +245,7 @@ def render(self, data: Mapping, db_id: int, step_id: int, errors: dict):
fields = schema.fields
# define default values
default_values = {
fields["threshold"].data_key: 0,
fields["how"].data_key: HowEnum.any,
}

# overwrite default values with other values if possible
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
KeepEnum,
PositionEnum,
CaseEnum,
HowEnum,
)

from celery.utils.log import get_task_logger
Expand Down Expand Up @@ -69,7 +70,7 @@ def make_input_params(self, data, **kwargs) -> FirstInputParameters:
class SecondInputParameters:
preprocessing_enum: PreprocessingEnum
axis: AxisEnum
threshold: int
how: HowEnum
subset: str
fill_value: str
keep: KeepEnum
Expand Down Expand Up @@ -119,15 +120,15 @@ class SecondInputParametersSchema(FrontendFormBaseSchema):
"input_type": "select",
},
)
threshold = ma.fields.Integer(
how = EnumField(
HowEnum,
required=True,
allow_none=False,
metadata={
"label": "Threshold",
"description": "Requires that many non-NA values. Cannot be combined with how. If left empty, then all values may not be NA.",
"input_type": "number",
"label": "How",
"description": "Select when a row / column is dropped. Any: if any NA is present, it will be dropped. All: if all values are NA, it will be dropped.",
"input_type": "select",
},
validate=validate.Range(min=0, min_inclusive=True),
)
subset = ma.fields.String(
required=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
const column_val = document.getElementById("column");

const axis_dis = axis_val.parentNode.parentNode;
const threshold_dis = document.getElementById("threshold").parentNode.parentNode;
const how_dis = document.getElementById("how").parentNode.parentNode;
const subset_dis = subset_val.parentNode.parentNode;
const fill_value_dis = document.getElementById("fill_value").parentNode.parentNode;
const keep_dis = document.getElementById("keep").parentNode.parentNode;
Expand Down Expand Up @@ -207,7 +207,7 @@

function preprocessing_enum_change() {
axis_dis.style.display = "none";
threshold_dis.style.display = "none";
how_dis.style.display = "none";
subset_dis.style.display = "none";
fill_value_dis.style.display = "none";
keep_dis.style.display = "none";
Expand All @@ -225,7 +225,7 @@

if (preprocessing_enum_value.value === "drop_na") {
axis_dis.style.display = "block";
threshold_dis.style.display = "block";
how_dis.style.display = "block";
subset_dis.style.display = "block";
change_parameter(subset_dis, null, "Labels along other axis to consider, e.g. if you are dropping rows these would be a list of columns to include.");
axis_change();
Expand Down

0 comments on commit 78d458f

Please sign in to comment.