Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decouple models from evaluate package #516

Merged
merged 2 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions cyclops/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,22 +149,18 @@
If a required column is not present in the dataset.

"""
required_columns_ = []
for required_column in required_columns:
if required_column is None:
continue
if isinstance(required_column, str):
required_columns_.append(required_column)
else:
required_columns_.extend(required_column)

for column in required_columns_:
if column is not None and column not in dataset_column_names:
raise ValueError(
f"Column {column} is not present in the dataset. Please "
"specify a valid column. The following columns are present "
f"in the dataset: {dataset_column_names}.",
)
required_columns_ = [

Check warning on line 152 in cyclops/data/utils.py

View check run for this annotation

Codecov / codecov/patch

cyclops/data/utils.py#L152

Added line #L152 was not covered by tests
column
for column in required_columns
if column is not None
for column in (column if isinstance(column, list) else [column])
if column is not None
]
missing_columns = set(required_columns_) - set(dataset_column_names)
if missing_columns:
raise ValueError(

Check warning on line 161 in cyclops/data/utils.py

View check run for this annotation

Codecov / codecov/patch

cyclops/data/utils.py#L159-L161

Added lines #L159 - L161 were not covered by tests
f"Dataset is missing the following required columns: {missing_columns}.",
)


def feature_is_numeric(feature: FEATURE_TYPES) -> bool:
Expand Down
178 changes: 35 additions & 143 deletions cyclops/evaluate/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import logging
import warnings
from dataclasses import asdict
from typing import Any, Callable, Dict, List, Optional, Sequence, Union, get_args
from typing import Any, Dict, List, Optional, Sequence, Union

from datasets import Dataset, DatasetDict, config, load_dataset
from datasets.splits import Split
from sklearn.compose import ColumnTransformer

from cyclops.data.slicer import SliceSpec
from cyclops.data.utils import (
Expand All @@ -18,8 +17,7 @@
from cyclops.evaluate.fairness.config import FairnessConfig
from cyclops.evaluate.fairness.evaluator import evaluate_fairness
from cyclops.evaluate.metrics.metric import Metric, MetricCollection
from cyclops.evaluate.utils import choose_split
from cyclops.models.wrappers import WrappedModel
from cyclops.evaluate.utils import _format_column_names, choose_split
from cyclops.utils.log import setup_logging


Expand All @@ -31,13 +29,8 @@
dataset: Union[str, Dataset, DatasetDict],
metrics: Union[Metric, Sequence[Metric], Dict[str, Metric], MetricCollection],
target_columns: Union[str, List[str]],
feature_columns: Optional[Union[str, List[str]]] = None,
prediction_column_prefix: str = "predictions",
remove_columns: Optional[Union[str, List[str]]] = None,
models: Optional[
Union[WrappedModel, Sequence[WrappedModel], Dict[str, WrappedModel]]
] = None,
transforms: Optional[Union[Callable[..., Any], ColumnTransformer]] = None,
prediction_columns: Union[str, List[str]],
ignore_columns: Optional[Union[str, List[str]]] = None,
slice_spec: Optional[SliceSpec] = None,
split: Optional[Union[str, Split]] = None,
batch_size: Optional[int] = config.DEFAULT_MAX_BATCH_SIZE,
Expand All @@ -57,30 +50,19 @@
metrics : Union[Metric, Sequence[Metric], Dict[str, Metric], MetricCollection]
The metrics to compute.
target_columns : Union[str, List[str]]
The name of the column(s) containing the target values.
feature_columns : Union[str, List[str]], optional
The name of the column(s) containing the feature values. This must be provided
if `models` is not None.
prediction_column_prefix : str, optional
The prefix of the column(s) containing the predictions. If `models` is not
None, the predictions will be added to the dataset and the column names will
be `{prediction_column_prefix}.{model_name}`. If `models` is None, the
predictions will be read from the dataset and the column names must start
with `prediction_column_prefix`.
remove_columns : Union[str, List[str]], optional
The name of the column(s) to remove from the dataset before filtering
and computing metrics. This is useful if the dataset contains columns
that are not needed for computing metrics but may be expensive to
keep in memory (e.g. image columns).
models : Union[WrappedModel, Sequence[WrappedModel], Dict[str, WrappedModel]]
The model(s) to evaluate. If a `Sequence` of `WrappedModel`, each model will
be evaluated on the entire dataset and the model class name will be used as
the model name. If a `Dict` of `WrappedModel`, each model will be evaluated
on the entire dataset and the keys will be used as the model names.
transforms : Callable, optional
A function that transforms the dataset before doing inference. This is
useful if the dataset needs to be transformed before being passed to
the model.
The name of the column(s) containing the target values. A string value
indicates a single column. A list of strings indicates a multi-label
task - the target values will be the union of the columns.
prediction_columns : Union[str, List[str]]
The names of the prediction columns used to compute metrics. If a string, it
should be the name of a column in the dataset. If a list, it should be a list
of column names in the dataset. Lists allow for evaluating multiple models
on the same dataset.
ignore_columns : Union[str, List[str]], optional
The name of the column(s) to ignore while filtering the dataset and computing
metrics. This is useful if the dataset contains columns that are not needed
for computing metrics but may be expensive to keep in memory
(e.g. image columns).
slice_spec : SliceSpec, optional
The slice specification to use for computing metrics. If None, no slices
will be computed - the metrics will be computed on the entire dataset.
Expand Down Expand Up @@ -123,84 +105,33 @@
------
ValueError
- If `dataset` is a `DatasetDict` and `split` is None.
- If `models` is None and `dataset` does not have a column that starts
with `prediction_column_prefix`.
- If `models` is not None and `feature_columns` is None.
- If multiple models are provided and only one set of results is found
after computing metrics.

"""
dataset = _load_data(dataset, split, **(load_dataset_kwargs or {}))
metrics = _prepare_metrics(metrics)

Check warning on line 111 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L111

Added line #L111 was not covered by tests

column_names: List[str] = dataset.column_names
check_required_columns(
column_names,
dataset.column_names,
target_columns,
feature_columns,
remove_columns,
prediction_columns,
ignore_columns,
)

metrics = _prepare_metrics(metrics)

if models is None and not any(
col.startswith(prediction_column_prefix) for col in column_names
):
raise ValueError(
"Got `model=None` but `dataset` does not have a column that "
f"starts with `{prediction_column_prefix}`. Please specify a "
f"model or add a column that starts with `{prediction_column_prefix}` "
"to the dataset.",
)

if models is not None:
if feature_columns is None:
raise ValueError(
"Got `models` but `feature_columns` is None. Please specify "
"`feature_columns` argument.",
)
models = _prepare_models(models)
for model_name, model in models.items():
dataset = model.predict_proba(
dataset,
feature_columns=feature_columns,
prediction_column_prefix=prediction_column_prefix,
model_name=model_name,
transforms=transforms,
only_predictions=False,
)

# compute metrics for each model
results = {}

if slice_spec is None:
slice_spec = SliceSpec()

metric_results = _compute_metrics(
dataset,
metrics,
slice_spec,
dataset=dataset,
metrics=metrics,
slice_spec=slice_spec,
target_columns=target_columns,
prediction_column_prefix=prediction_column_prefix,
remove_columns=remove_columns,
prediction_columns=prediction_columns,
ignore_columns=ignore_columns,
batch_size=batch_size,
raise_on_empty_slice=raise_on_empty_slice,
)
if "default" in metric_results:
if models is not None and len(models) > 1:
raise ValueError(
"Got multiple models but only one set of predictions. "
"Please make sure that the predictions for each model "
f"starts with `{prediction_column_prefix}` followed by "
"the model name. For example, if the model name is "
"`my_model`, the predictions should be in a column "
f"called `{prediction_column_prefix}.my_model`.",
)
if models is not None: # only one model; replace "default" with model name
model_name = list(models.keys())[0]
metric_results[model_name] = metric_results.pop("default")
else: # no models; don't name the results
metric_results = metric_results.pop("default")

results = {}

Check warning on line 134 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L134

Added line #L134 was not covered by tests
results.update(metric_results)

if fairness_config is not None:
Expand All @@ -209,13 +140,9 @@

fairness_config.dataset = dataset
fairness_config.target_columns = target_columns
fairness_config.prediction_columns = [
col
for col in dataset.column_names
if col.startswith(prediction_column_prefix)
]
fairness_config.prediction_columns = prediction_columns

Check warning on line 143 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L143

Added line #L143 was not covered by tests
fairness_config.batch_size = batch_size
fairness_config.remove_columns = remove_columns
fairness_config.remove_columns = ignore_columns

Check warning on line 145 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L145

Added line #L145 was not covered by tests

fairness_results = evaluate_fairness(**asdict(fairness_config))
results["fairness"] = fairness_results
Expand Down Expand Up @@ -294,47 +221,19 @@
)


def _prepare_models(
model: Union[WrappedModel, Sequence[WrappedModel], Dict[str, WrappedModel]],
) -> Dict[str, WrappedModel]:
"""Prepare models for evaluation."""
if isinstance(model, get_args(WrappedModel)):
model_name: str = model.model_.__class__.__name__
return {model_name: model}
if isinstance(model, (list, tuple)):
assert all(isinstance(m, get_args(WrappedModel)) for m in model)
return {m.getattr("model_").__class__.__name__: m for m in model}
if isinstance(model, dict):
assert all(isinstance(m, get_args(WrappedModel)) for m in model.values())
return model

raise TypeError(
f"Invalid type for `model`: {type(model)}. "
"Expected one of: WrappedModel, Sequence[WrappedModel], "
"Dict[str, WrappedModel].",
)


def _compute_metrics(
dataset: Dataset,
metrics: MetricCollection,
slice_spec: SliceSpec,
target_columns: Union[str, List[str]],
prediction_column_prefix: str = "predictions",
remove_columns: Optional[Union[str, List[str]]] = None,
prediction_columns: Union[str, List[str]],
ignore_columns: Optional[Union[str, List[str]]] = None,
batch_size: Optional[int] = config.DEFAULT_MAX_BATCH_SIZE,
raise_on_empty_slice: bool = False,
) -> Dict[str, Dict[str, Any]]:
"""Compute metrics for a dataset."""
if isinstance(target_columns, str):
target_columns = [target_columns]

# get the predictions (there could be multiple)
# any column starting with `prediction_column_prefix` is considered a
# prediction column, for a single model
prediction_columns = [
col for col in dataset.column_names if col.startswith(prediction_column_prefix)
]
target_columns = _format_column_names(target_columns)
prediction_columns = _format_column_names(prediction_columns)

Check warning on line 236 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L235-L236

Added lines #L235 - L236 were not covered by tests

# temporarily stop decoding features to save memory
set_decode(dataset, False, exclude=target_columns + prediction_columns)
Expand All @@ -345,9 +244,8 @@
output_all_columns=True,
):
results: Dict[str, Dict[str, Any]] = {}

for slice_name, slice_fn in slice_spec.slices():
sliced_dataset = dataset.remove_columns(remove_columns or []).filter(
sliced_dataset = dataset.remove_columns(ignore_columns or []).filter(

Check warning on line 248 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L248

Added line #L248 was not covered by tests
slice_fn,
batched=True,
batch_size=batch_size,
Expand Down Expand Up @@ -400,13 +298,7 @@
metric_output = metrics.compute()
metrics.reset_state()

# get the model name from the prediction column name
# model name is everything after the first `prediction_column_prefix.`
model_name: str = "default"
pred_col_split = prediction_column.split(".", 1)
if len(pred_col_split) == 2:
model_name = pred_col_split[1]

model_name: str = "model_for_%s" % prediction_column

Check warning on line 301 in cyclops/evaluate/evaluator.py

View check run for this annotation

Codecov / codecov/patch

cyclops/evaluate/evaluator.py#L301

Added line #L301 was not covered by tests
results.setdefault(model_name, {})
results[model_name][slice_name] = metric_output

Expand Down
35 changes: 3 additions & 32 deletions cyclops/evaluate/fairness/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
_check_thresholds,
_get_value_if_singleton_array,
)
from cyclops.evaluate.utils import _format_column_names
from cyclops.utils.log import setup_logging


Expand All @@ -42,7 +43,7 @@ def evaluate_fairness(
dataset: Dataset,
groups: Union[str, List[str]],
target_columns: Union[str, List[str]],
prediction_columns: Union[str, List[str]] = "predictions",
prediction_columns: Union[str, List[str]],
group_values: Optional[Dict[str, Any]] = None,
group_bins: Optional[Dict[str, Union[int, List[Any]]]] = None,
group_base_values: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -77,7 +78,7 @@ def evaluate_fairness(
The target or targets columns used to compute metrics. If a string, it should
be the name of a column in the dataset. If a list, it should be a list of
column names in the dataset. Lists will be treated as multilabel targets.
prediction_columns : Union[str, List[str]], default="predictions"
prediction_columns : Union[str, List[str]]
The names of the prediction columns used to compute metrics. If a string, it
should be the name of a column in the dataset. If a list, it should be a list
of column names in the dataset. Lists allow for evaluating multiple models
Expand Down Expand Up @@ -411,36 +412,6 @@ def _format_metrics(
)


def _format_column_names(column_names: Union[str, List[str]]) -> List[str]:
"""Format the column names to list of strings if not already a list.

Parameters
----------
column_names : Union[str, List[str]]
The column names to format.

Returns
-------
List[str]
The formatted column names.

Raises
------
TypeError
If any of the column names are not strings or list of strings.

"""
if isinstance(column_names, str):
return [column_names]
if isinstance(column_names, list):
return column_names

raise TypeError(
f"Expected column name {column_names} to be a string or "
f"list of strings, but got {type(column_names)}.",
)


def _get_unique_values(
dataset: Dataset,
groups: List[str],
Expand Down
Loading
Loading