From 82986bc3ce627ba30c42be24597522ada941c204 Mon Sep 17 00:00:00 2001 From: iamheinrich <76793837+iamheinrich@users.noreply.github.com> Date: Thu, 15 May 2025 10:20:39 +0200 Subject: [PATCH 1/6] refactor: Status Quo --- doleus/datasets/base.py | 19 +-- doleus/storage/prediction_store.py | 110 ++++++++++--- tests/test_metric_classification.py | 235 +++++++++++++++------------- tests/test_metric_detection.py | 218 ++++++++++++-------------- 4 files changed, 317 insertions(+), 265 deletions(-) diff --git a/doleus/datasets/base.py b/doleus/datasets/base.py index fc48cd3..0404bd3 100644 --- a/doleus/datasets/base.py +++ b/doleus/datasets/base.py @@ -107,6 +107,7 @@ def add_model_predictions( self.prediction_store.add_predictions( predictions=predictions, model_id=model_id, + task=self.task, ) # ------------------------------------------------------------------------- @@ -161,27 +162,23 @@ def add_metadata_from_list(self, metadata_list: List[Dict[str, Any]]): for key, value in md_dict.items(): self.metadata_store.add_metadata(i, key, value) - def add_predefined_metadata(self, keys: Union[str, List[str]]): + def add_predefined_metadata(self, attribute: str) -> None: """Add predefined metadata using functions from ATTRIBUTE_FUNCTIONS. Parameters ---------- - keys : Union[str, List[str]] - Name(s) of predefined metadata function(s) to compute and add. + attribute : str + Name of predefined metadata function to compute and add. Available keys are defined in ATTRIBUTE_FUNCTIONS. Raises ------ ValueError - If any key is not found in ATTRIBUTE_FUNCTIONS. + If attribute is not found in ATTRIBUTE_FUNCTIONS. """ - if isinstance(keys, str): - keys = [keys] - - for key in keys: - if key not in ATTRIBUTE_FUNCTIONS: - raise ValueError(f"Unknown predefined metadata key: {key}") - self.add_metadata(key, ATTRIBUTE_FUNCTIONS[key]) + if attribute not in ATTRIBUTE_FUNCTIONS: + raise ValueError(f"Unknown predefined metadata attribute: {attribute}") + self.add_metadata(attribute, ATTRIBUTE_FUNCTIONS[attribute]) def add_metadata_from_dataframe(self, df): """Add metadata from a pandas DataFrame. diff --git a/doleus/storage/prediction_store.py b/doleus/storage/prediction_store.py index 31c5a3d..d4dedaf 100644 --- a/doleus/storage/prediction_store.py +++ b/doleus/storage/prediction_store.py @@ -1,9 +1,9 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union import torch from doleus.annotations import Annotations, BoundingBoxes, Labels -from doleus.utils import TaskType +from doleus.utils import Task, TaskType class PredictionStore: @@ -28,6 +28,7 @@ def add_predictions( self, predictions: Union[torch.Tensor, List[Dict[str, Any]]], model_id: str, + task: Optional[str] = None, ) -> None: """ Store predictions for a model. @@ -41,12 +42,16 @@ def add_predictions( with 'boxes', 'labels', and 'scores' keys. model_id : str Identifier of the specified model. + task : Optional[str], optional + The specific task (e.g., "multilabel", "multiclass"), by default None. """ - processed_predictions = self._process_predictions(predictions) + processed_predictions = self._process_predictions(predictions, task) self.predictions[model_id] = processed_predictions def _process_predictions( - self, predictions: Union[torch.Tensor, List[Dict[str, Any]], Annotations] + self, + predictions: Union[torch.Tensor, List[Dict[str, Any]], Annotations], + task: Optional[str] = None, ) -> Annotations: """Process raw predictions into the standard annotation format. @@ -57,14 +62,18 @@ def _process_predictions( - A torch.Tensor for classification tasks - A list of dictionaries for detection tasks - An already processed Annotations object + task : Optional[str], optional + The specific task (e.g., "multilabel", "multiclass"), by default None. + Returns ------- Annotations Processed predictions in standard annotation format. """ - if isinstance(predictions[0], Labels) or isinstance( - predictions[0], BoundingBoxes + if isinstance(predictions, Annotations) and ( + isinstance(predictions[0], Labels) + or isinstance(predictions[0], BoundingBoxes) ): return predictions @@ -78,43 +87,89 @@ def _process_predictions( num_samples = predictions.shape[0] - # If shape is [N], assume these are predicted labels (class IDs) - # If shape is [N, C], assume these are logits or probabilities if predictions.dim() == 1: + # Assume these are predicted labels (class IDs) for single-label tasks for i in range(num_samples): - label_val = predictions[i].unsqueeze(0) + label_val = predictions[i].unsqueeze(0) # Ensure [1] shape ann = Labels(datapoint_number=i, labels=label_val, scores=None) processed.add(ann) - elif predictions.dim() == 2: - # logits or probabilities of shape [N, C] - # currently we always interpret them as logits, with an argmax + elif predictions.dim() == 2: # Shape [N, C] for i in range(num_samples): - logit_row = predictions[i] - # "labels" is the top-1 predicted label - pred_label = logit_row.argmax(dim=0).unsqueeze(0) - scores = torch.softmax(logit_row, dim=0) - ann = Labels( - datapoint_number=i, - labels=pred_label, # shape [1] - scores=scores, # shape [self.num_classes] - ) + prediction_row = predictions[i] # This is the [C] tensor for the i-th sample + current_labels: torch.Tensor + current_scores: Optional[torch.Tensor] + + if task == Task.MULTILABEL.value: + if prediction_row.dtype in (torch.long, torch.int, torch.bool): + # Input is integer multi-hot + current_labels = prediction_row + current_scores = None + else: # Float input, assumed to be logits or probabilities + # Apply sigmoid if not already probabilities in [0,1] + if not (prediction_row.min() >= 0 and prediction_row.max() <= 1): + processed_scores_for_row = torch.sigmoid(prediction_row) + else: # Already probabilities + processed_scores_for_row = prediction_row + + current_labels = (processed_scores_for_row >= 0.5).long() # Default threshold 0.5 + current_scores = processed_scores_for_row + + ann = Labels( + datapoint_number=i, + labels=current_labels, + scores=current_scores, + ) + else: # Binary, Multiclass, or task is None (default to old behavior) + current_labels = prediction_row.argmax(dim=0).unsqueeze(0) # [1] tensor + + if prediction_row.dtype == torch.float: + current_scores = torch.softmax(prediction_row, dim=0) # [C] tensor + else: # Integer input + current_scores = None + + ann = Labels( + datapoint_number=i, + labels=current_labels, + scores=current_scores, + ) processed.add(ann) - else: - raise ValueError("Classification predictions must be 1D or 2D tensor.") + raise ValueError( + "Classification predictions must be a 1D or 2D tensor." + ) elif self.task_type == TaskType.DETECTION.value: if not isinstance(predictions, list): raise TypeError( "For detection, predictions must be a list of length N." ) + if not all(isinstance(p, dict) for p in predictions): + raise TypeError( + "Each item in detection predictions list must be a dictionary." + ) + # Each element should look like {"boxes": (M,4), "labels": (M,), "scores": (M,)} for i, pred_dict in enumerate(predictions): - boxes_xyxy = torch.tensor(pred_dict["boxes"], dtype=torch.float32) - labels = torch.tensor(pred_dict["labels"], dtype=torch.long) - scores = torch.tensor(pred_dict["scores"], dtype=torch.float32) + # Validate keys + required_keys = {"boxes", "labels", "scores"} + if not required_keys.issubset(pred_dict.keys()): + raise ValueError(f"Detection prediction dict for sample {i} missing keys. Required: {required_keys}") + + boxes_xyxy = torch.as_tensor(pred_dict["boxes"], dtype=torch.float32) + labels = torch.as_tensor(pred_dict["labels"], dtype=torch.long) + scores = torch.as_tensor(pred_dict["scores"], dtype=torch.float32) + + # Validate shapes + num_detections = boxes_xyxy.shape[0] + if not (boxes_xyxy.ndim == 2 and boxes_xyxy.shape[1] == 4): + raise ValueError(f"boxes for sample {i} must have shape (M,4)") + if not (labels.ndim == 1 and labels.shape[0] == num_detections): + raise ValueError(f"labels for sample {i} must have shape (M,)") + if not (scores.ndim == 1 and scores.shape[0] == num_detections): + raise ValueError(f"scores for sample {i} must have shape (M,)") + ann = BoundingBoxes( datapoint_number=i, @@ -123,6 +178,9 @@ def _process_predictions( scores=scores, ) processed.add(ann) + else: + raise ValueError(f"Unsupported task type: {self.task_type}") + return processed diff --git a/tests/test_metric_classification.py b/tests/test_metric_classification.py index 95113c1..d567910 100644 --- a/tests/test_metric_classification.py +++ b/tests/test_metric_classification.py @@ -1,146 +1,161 @@ import pytest import torch -from moonwatcher.dataset.dataset import MoonwatcherDataset -from moonwatcher.metric import calculate_metric_internal +from torch.utils.data import Dataset as TorchDataset + +from doleus.datasets.classification import DoleusClassification +from doleus.metric import calculate_metric_internal +from doleus.utils.data import Task class MockModel: - def __init__(self, name, task, device): + def __init__(self, name: str, task_type: str): self.name = name - self.task_type = task - self.device = device + self.task_type = task_type -class MockDataset(MoonwatcherDataset): - def __init__(self, name, labels, label_to_name): - self.name = name - self.labels = labels - self.label_to_name = label_to_name - self.dataset = labels +class MockTorchDataset(TorchDataset): + def __init__(self, img_labels: list): + self.img_labels = img_labels + def __len__(self): + return len(self.img_labels) -class MockLabel: - def __init__(self, label): - self.labels = torch.tensor([label]) + def __getitem__(self, idx): + return torch.empty(0), self.img_labels[idx] -def mock_do_predictions_exist(dataset_name, model_name): - return True +class MockLabel: + def __init__(self, labels): + if not isinstance(labels, torch.Tensor): + self.labels = torch.tensor(labels) + else: + self.labels = labels + +def test_multilabel_accuracy(): + model = MockModel(name="mock_multilabel_model", task_type=Task.MULTILABEL.value) + assert True == True + +def test_calculate_accuracy_binary(): + model = MockModel(name="mock_binary_model", task_type=Task.BINARY.value) + + groundtruths_labels = [MockLabel([0]), MockLabel([1]), MockLabel([0]), MockLabel([1])] + predictions_labels = [MockLabel([0]), MockLabel([1]), MockLabel([1]), MockLabel([1])] + + dataset_tensor_labels = [gt.labels for gt in groundtruths_labels] + mock_torch_dataset = MockTorchDataset(dataset_tensor_labels) + + doleus_dataset = DoleusClassification( + dataset=mock_torch_dataset, + name="mock_binary_dataset", + task=Task.BINARY.value, + num_classes=2, + label_to_name={0: "class0", 1: "class1"} + ) + relevant_ids = list(range(len(doleus_dataset))) + result = calculate_metric_internal( + model, relevant_ids, doleus_dataset, groundtruths_labels, predictions_labels, "Accuracy" + ) + assert result == 0.75, f"Expected Accuracy to be 0.75 but got {result}" -def mock_load_groundtruths(dataset_name): - return [MockLabel(0), MockLabel(1), MockLabel(0), MockLabel(1)] +def test_calculate_precision_binary(): + model = MockModel(name="mock_binary_model", task_type=Task.BINARY.value) + groundtruths_labels = [MockLabel([0]), MockLabel([1]), MockLabel([0]), MockLabel([1])] + predictions_labels = [MockLabel([0]), MockLabel([1]), MockLabel([1]), MockLabel([1])] + + dataset_tensor_labels = [gt.labels for gt in groundtruths_labels] + mock_torch_dataset = MockTorchDataset(dataset_tensor_labels) + + doleus_dataset = DoleusClassification( + dataset=mock_torch_dataset, + name="mock_binary_dataset", + task=Task.BINARY.value, + num_classes=2, + label_to_name={0: "class0", 1: "class1"} + ) + relevant_ids = list(range(len(doleus_dataset))) -def mock_load_predictions(dataset_name, model_name): - return [MockLabel(0), MockLabel(1), MockLabel(1), MockLabel(1)] + result = calculate_metric_internal( + model, relevant_ids, doleus_dataset, groundtruths_labels, predictions_labels, "Precision" + ) + assert result == pytest.approx(2/3), f"Expected Precision to be {2/3} but got {result}" -def mock_inference(model, dataset, device): - pass +def test_calculate_recall_binary(): + model = MockModel(name="mock_binary_model", task_type=Task.BINARY.value) + groundtruths_labels = [MockLabel([0]), MockLabel([1]), MockLabel([0]), MockLabel([1])] + predictions_labels = [MockLabel([0]), MockLabel([1]), MockLabel([1]), MockLabel([1])] + dataset_tensor_labels = [gt.labels for gt in groundtruths_labels] + mock_torch_dataset = MockTorchDataset(dataset_tensor_labels) -@pytest.fixture(autouse=True) -def patch_functions(monkeypatch): - monkeypatch.setattr( - "moonwatcher.utils.data_storage.do_predictions_exist", mock_do_predictions_exist + doleus_dataset = DoleusClassification( + dataset=mock_torch_dataset, + name="mock_binary_dataset", + task=Task.BINARY.value, + num_classes=2, + label_to_name={0: "class0", 1: "class1"} ) - monkeypatch.setattr( - "moonwatcher.utils.data_storage.load_groundtruths", mock_load_groundtruths - ) - monkeypatch.setattr( - "moonwatcher.utils.data_storage.load_predictions", mock_load_predictions - ) - monkeypatch.setattr("moonwatcher.inference.inference", mock_inference) - + relevant_ids = list(range(len(doleus_dataset))) -def load_data_for_testing(): - model = MockModel( - name="mock_model", task_type=TaskType.CLASSIFICATION.value, device="cpu" - ) - dataset = MockDataset( - name="mock_dataset", - labels=[0, 1, 0, 1], - label_to_name={0: "class0", 1: "class1"}, - ) - relevant_ids = list(range(len(dataset.labels))) - groundtruths_loaded = mock_load_groundtruths(dataset.name) - predictions_loaded = mock_load_predictions(dataset.name, model.name) - return model, relevant_ids, dataset, groundtruths_loaded, predictions_loaded - - -def test_calculate_accuracy(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() result = calculate_metric_internal( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - "Accuracy", + model, relevant_ids, doleus_dataset, groundtruths_labels, predictions_labels, "Recall" ) - assert result == 0.75, f"Expected Accuracy to be 0.75 but got {result}" + assert result == 1.0, f"Expected Recall to be 1.0 but got {result}" -def test_calculate_precision(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() - result = calculate_metric_internal( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - "Precision", +def test_calculate_f1_binary(): + model = MockModel(name="mock_binary_model", task_type=Task.BINARY.value) + groundtruths_labels = [MockLabel([0]), MockLabel([1]), MockLabel([0]), MockLabel([1])] + predictions_labels = [MockLabel([0]), MockLabel([1]), MockLabel([1]), MockLabel([1])] + + dataset_tensor_labels = [gt.labels for gt in groundtruths_labels] + mock_torch_dataset = MockTorchDataset(dataset_tensor_labels) + + doleus_dataset = DoleusClassification( + dataset=mock_torch_dataset, + name="mock_binary_dataset", + task=Task.BINARY.value, + num_classes=2, + label_to_name={0: "class0", 1: "class1"} ) - assert ( - result == 0.66667 - ), f"Expected Precision to be 0.66667 but got { - result}" - - -def test_calculate_recall(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() + relevant_ids = list(range(len(doleus_dataset))) + result = calculate_metric_internal( - model, relevant_ids, dataset, groundtruths_loaded, predictions_loaded, "Recall" + model, relevant_ids, doleus_dataset, groundtruths_labels, predictions_labels, "F1_Score" ) - assert result == 1.0, f"Expected Recall to be 1.0 but got {result}" + assert result == 0.8, f"Expected F1_Score to be 0.8 but got {result}" -def test_calculate_f1(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() +def test_calculate_accuracy_multilabel(): + model = MockModel(name="mock_multilabel_model", task_type=Task.MULTILABEL.value) + + groundtruths_labels = [ + MockLabel([1, 0, 1]), MockLabel([0, 1, 1]), + MockLabel([1, 1, 0]), MockLabel([0, 0, 1]) + ] + predictions_labels = [ + MockLabel([1, 0, 1]), MockLabel([0, 1, 0]), + MockLabel([1, 1, 0]), MockLabel([0, 1, 1]) + ] + + dataset_tensor_labels = [gt.labels for gt in groundtruths_labels] + mock_torch_dataset = MockTorchDataset(dataset_tensor_labels) + + doleus_dataset = DoleusClassification( + dataset=mock_torch_dataset, + name="mock_multilabel_dataset", + task=Task.MULTILABEL.value, + num_classes=3, + label_to_name={0: "classA", 1: "classB", 2: "classC"} + ) + relevant_ids = list(range(len(doleus_dataset))) + result = calculate_metric_internal( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - "F1_Score", + model, relevant_ids, doleus_dataset, groundtruths_labels, predictions_labels, "Accuracy" ) - assert result == 0.8, f"Expected F1_Score to be 0.8 but got {result}" + assert result == 0.5, f"Expected Multilabel Accuracy to be 0.5 but got {result}" if __name__ == "__main__": diff --git a/tests/test_metric_detection.py b/tests/test_metric_detection.py index 0989f6d..e5271f0 100644 --- a/tests/test_metric_detection.py +++ b/tests/test_metric_detection.py @@ -1,129 +1,111 @@ import pytest import torch -from moonwatcher.dataset.dataset import MoonwatcherDataset -from moonwatcher.metric import calculate_metric - - -class MockModel: - def __init__(self, name, task_type, device): - self.name = name - self.task_type = task_type - self.device = device - - -class MockDataset(MoonwatcherDataset): - def __init__(self, name): - self.name = name - self.dataset = [] - - -class MockDetection: - def __init__(self, boxes, labels, scores=None): - self.boxes = torch.tensor(boxes) - self.labels = torch.tensor(labels) - self.scores = torch.tensor(scores) if scores else None - - def to_dict(self): - result = {"boxes": self.boxes, "labels": self.labels} - if self.scores is not None: - result["scores"] = self.scores - return result - - -def mock_do_predictions_exist(dataset_name, model_name): - return True - - -def mock_load_groundtruths(dataset_name): - return [ - MockDetection([[50, 50, 150, 150]], [1]), - MockDetection([[30, 30, 120, 120]], [0]), - MockDetection([[10, 10, 100, 100]], [1]), - MockDetection([[40, 40, 140, 140]], [1]), - ] - - -def mock_load_predictions(dataset_name, model_name): - return [ - MockDetection([[50, 50, 150, 150]], [1], [0.9]), - MockDetection([[35, 35, 115, 115]], [0], [0.8]), - MockDetection([[15, 15, 105, 105]], [1], [0.75]), - MockDetection([[45, 45, 145, 145]], [1], [0.85]), - ] - - -def mock_inference(model, dataset, device): - pass - - -@pytest.fixture(autouse=True) -def patch_functions(monkeypatch): - monkeypatch.setattr( - "moonwatcher.utils.data_storage.do_predictions_exist", mock_do_predictions_exist - ) - monkeypatch.setattr( - "moonwatcher.utils.data_storage.load_groundtruths", mock_load_groundtruths +from torch.utils.data import Dataset as TorchDataset +from doleus.datasets.detection import DoleusDetection +from doleus.annotations import Annotations, BoundingBoxes +from doleus.utils.data import TaskType +from doleus.metrics.calculator import calculate_metric + +# Define a simple dummy dataset for detection +class DummyDetectionDataset(TorchDataset): + def __init__(self, num_samples=4): + self.num_samples = num_samples + # Predefined data (image placeholder, boxes, labels) + self.data = [ + ( + torch.randn(3, 100, 100), + torch.tensor([[10, 10, 50, 50], [60, 60, 90, 90]]), + torch.tensor([0, 1]) + ), + ( + torch.randn(3, 100, 100), + torch.tensor([[20, 20, 70, 70]]), + torch.tensor([2]) + ), + ( + torch.randn(3, 100, 100), + torch.tensor([[30, 30, 80, 80], [50, 50, 95, 95]]), + torch.tensor([1, 3]) + ), + ( + torch.randn(3, 100, 100), + torch.tensor([[40, 40, 90, 90]]), + torch.tensor([0]) + ), + ] + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + if idx >= self.num_samples: + raise IndexError("Index out of bounds") + return self.data[idx] + + +# --- Test Setup --- + +@pytest.fixture(scope="module") +def detection_data(): + """Prepares a DoleusDetection dataset with sample ground truths and predictions.""" + + # 1. Create DoleusDetection dataset + doleus_dataset = DoleusDetection( + dataset=DummyDetectionDataset(), + name="test_detection_dataset", ) - monkeypatch.setattr( - "moonwatcher.utils.data_storage.load_predictions", mock_load_predictions - ) - monkeypatch.setattr("moonwatcher.inference.inference", mock_inference) - -def load_data_for_testing(): - model = MockModel( - name="mock_model", task_type=TaskType.DETECTION.value, device="cpu" - ) - dataset = MockDataset(name="mock_dataset") - relevant_ids = list(range(4)) - groundtruths_loaded = mock_load_groundtruths(dataset.name) - predictions_loaded = mock_load_predictions(dataset.name, model.name) - return model, relevant_ids, dataset, groundtruths_loaded, predictions_loaded + # 2. Define sample predictions. Datapoint number corresponds to the index of the datapoint in the underlying dataset. + predictions = [ + BoundingBoxes( + datapoint_number=0, + boxes_xyxy=torch.tensor([[12, 12, 48, 48], [65, 65, 88, 88]], dtype=torch.float32), + labels=torch.tensor([0, 1]), # Correct labels predicted + scores=torch.tensor([0.9, 0.85]), + ), + BoundingBoxes( + datapoint_number=1, + boxes_xyxy=torch.tensor([[25, 25, 75, 75], [5, 5, 15, 15]], dtype=torch.float32), + labels=torch.tensor([2, 0]), # Correct label + a false positive (class 0) + scores=torch.tensor([0.8, 0.5]), + ), + BoundingBoxes( + datapoint_number=2, + boxes_xyxy=torch.tensor([[30, 30, 80, 80]], dtype=torch.float32), + labels=torch.tensor([1]), # Predicts only label 1 (misses label 3) + scores=torch.tensor([0.75]), + ), + BoundingBoxes( + datapoint_number=3, + boxes_xyxy=torch.tensor([[42, 42, 88, 88], [10, 60, 30, 80]], dtype=torch.float32), + labels=torch.tensor([3, 2]), # Incorrect labels predicted (FP) + scores=torch.tensor([0.85, 0.6]), + ), + ] + # 3. Add predictions + doleus_dataset.add_model_predictions(predictions, model_id="test_model") + return doleus_dataset -def test_calculate_iou(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() - result = calculate_metric( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - "IntersectionOverUnion", - ) - assert ( - result > 0.75 - ), f"Expected IoU to be greater than 0.75 but got { - result}" +# --- Placeholder for Actual Tests --- +# (Tests will be added in the next step) -def test_calculate_map(): - ( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - ) = load_data_for_testing() - result = calculate_metric( - model, - relevant_ids, - dataset, - groundtruths_loaded, - predictions_loaded, - "mAP", - ) - assert ( - result > 0.7 - ), f"Expected mAP to be greater than 0.7 but got { - result}" +# Example of how to use the fixture (will be replaced by actual tests): +# def test_setup_works(detection_data): +# doleus_dataset, relevant_ids = detection_data +# assert doleus_dataset.name == "test_detection_dataset" +# assert len(relevant_ids) == 4 +# assert len(doleus_dataset.groundtruths) == 4 +# assert len(doleus_dataset.predictions) == 4 +# assert doleus_dataset.task_type == TaskType.DETECTION.value +# print("\nSetup seems okay.") if __name__ == "__main__": - pytest.main() + # You can run pytest directly: + pytest.main([__file__]) + # Or manually invoke the fixture for debugging: + # print("Debugging fixture setup:") + # data, ids = detection_data() + # test_setup_works((data, ids)) From 08ba4d4897cc5bb8db89d45fedac9b2d6ef08532 Mon Sep 17 00:00:00 2001 From: iamheinrich <76793837+iamheinrich@users.noreply.github.com> Date: Fri, 23 May 2025 19:13:44 +0200 Subject: [PATCH 2/6] refactor: refactored cumulated prediction store into base, classification and detection store. added multilabel support for predictions. --- doleus/annotations/classification.py | 19 +- doleus/datasets/base.py | 54 ++++-- doleus/storage/__init__.py | 14 +- doleus/storage/base_store.py | 122 +++++++++++++ doleus/storage/classification_store.py | 162 +++++++++++++++++ doleus/storage/detection_store.py | 85 +++++++++ doleus/storage/prediction_store.py | 238 ------------------------- 7 files changed, 437 insertions(+), 257 deletions(-) create mode 100644 doleus/storage/base_store.py create mode 100644 doleus/storage/classification_store.py create mode 100644 doleus/storage/detection_store.py delete mode 100644 doleus/storage/prediction_store.py diff --git a/doleus/annotations/classification.py b/doleus/annotations/classification.py index bb4441d..cb69381 100644 --- a/doleus/annotations/classification.py +++ b/doleus/annotations/classification.py @@ -13,7 +13,7 @@ class Labels(Annotation): """ def __init__( - self, datapoint_number: int, labels: Tensor, scores: Optional[Tensor] = None + self, datapoint_number: int, labels: Optional[Tensor], scores: Optional[Tensor] = None ): """Initialize a Labels instance. @@ -21,11 +21,18 @@ def __init__( ---------- datapoint_number : int Index for the corresponding data point. - labels : Tensor - A 1D integer tensor representing the label(s). + labels : Optional[Tensor] + A 1D integer tensor. For single-label tasks, this typically contains one class index + (e.g., `tensor([2])`). For multilabel tasks, this is typically a multi-hot encoded + tensor (e.g., `tensor([1, 0, 1])`). Can be `None` if only `scores` are provided. scores : Optional[Tensor], optional - A float tensor containing predicted probability scores (optional). + A 1D float tensor. For single-label tasks (e.g. multiclass), this usually contains + probabilities for each class (e.g., `tensor([0.1, 0.2, 0.7])`). For multilabel + tasks, this contains independent probabilities for each label (e.g., + `tensor([0.8, 0.1, 0.9])`). Optional. """ + if labels is None and scores is None: + raise ValueError("Either 'labels' or 'scores' must be provided but both are None.") super().__init__(datapoint_number) self.labels = labels self.scores = scores @@ -38,7 +45,9 @@ def to_dict(self) -> dict: dict Dictionary with keys 'labels' and optionally 'scores'. """ - output = {"labels": self.labels} + output = {} + if self.labels is not None: + output["labels"] = self.labels if self.scores is not None: output["scores"] = self.scores return output diff --git a/doleus/datasets/base.py b/doleus/datasets/base.py index 66ba1bf..4dd8fa4 100644 --- a/doleus/datasets/base.py +++ b/doleus/datasets/base.py @@ -7,10 +7,16 @@ from tqdm import tqdm from doleus.annotations import BoundingBoxes, Labels -from doleus.storage import GroundTruthStore, MetadataStore, PredictionStore +from doleus.storage import ( + ClassificationPredictionStore, + DetectionPredictionStore, + GroundTruthStore, + MetadataStore, +) from doleus.utils import ( ATTRIBUTE_FUNCTIONS, OPERATOR_DICT, + TaskType, get_current_timestamp, to_numpy_image, create_filename, @@ -71,7 +77,18 @@ def __init__( self.metadata["_timestamp"] = get_current_timestamp() self.groundtruth_store = GroundTruthStore(task_type=task_type, dataset=dataset) - self.prediction_store = PredictionStore(task_type=task_type) + + if self.task_type == TaskType.CLASSIFICATION.value: + if not self.task: + raise ValueError( + "For classification task_type, a specific 'task' (e.g., binary, multiclass, multilabel) must be provided." + ) + self.prediction_store = ClassificationPredictionStore() + elif self.task_type == TaskType.DETECTION.value: + self.prediction_store = DetectionPredictionStore() + else: + raise ValueError(f"Unsupported task_type: {self.task_type} for PredictionStore assignment") + self.metadata_store = MetadataStore( num_datapoints=len(dataset), metadata=per_datapoint_metadata ) @@ -86,7 +103,7 @@ def __getattr__(self, attr): return getattr(self.dataset, attr) @abstractmethod - def _create_new_instance(self, dataset, indices): + def _create_new_instance(self, dataset, indices, slice_name): pass def add_model_predictions( @@ -106,10 +123,21 @@ def add_model_predictions( model_id : str Name of the model that generated these predictions """ + kwargs = {} + if self.task_type == TaskType.CLASSIFICATION.value: + kwargs['task'] = self.task + # Ensure predictions is a Tensor for classification + if not isinstance(predictions, torch.Tensor): + raise TypeError("For classification tasks, predictions must be a torch.Tensor.") + elif self.task_type == TaskType.DETECTION.value: + # Ensure predictions is a List[Dict] for detection + if not isinstance(predictions, list) or not all(isinstance(p, dict) for p in predictions): + raise TypeError("For detection tasks, predictions must be a list of dictionaries.") + self.prediction_store.add_predictions( predictions=predictions, model_id=model_id, - task=self.task, + **kwargs, ) # ------------------------------------------------------------------------- @@ -165,23 +193,27 @@ def add_metadata_from_list(self, metadata_list: List[Dict[str, Any]]): for key, value in md_dict.items(): self.metadata_store.add_metadata(i, key, value) - def add_predefined_metadata(self, attribute: str) -> None: + def add_predefined_metadata(self, keys: Union[str, List[str]]): """Add predefined metadata using functions from ATTRIBUTE_FUNCTIONS. Parameters ---------- - attribute : str - Name of predefined metadata function to compute and add. + keys : Union[str, List[str]] + Name(s) of predefined metadata function(s) to compute and add. Available keys are defined in ATTRIBUTE_FUNCTIONS. Raises ------ ValueError - If attribute is not found in ATTRIBUTE_FUNCTIONS. + If any key is not found in ATTRIBUTE_FUNCTIONS. """ - if attribute not in ATTRIBUTE_FUNCTIONS: - raise ValueError(f"Unknown predefined metadata attribute: {attribute}") - self.add_metadata(attribute, ATTRIBUTE_FUNCTIONS[attribute]) + if isinstance(keys, str): + keys = [keys] + + for key in keys: + if key not in ATTRIBUTE_FUNCTIONS: + raise ValueError(f"Unknown predefined metadata key: {key}") + self.add_metadata(key, ATTRIBUTE_FUNCTIONS[key]) def add_metadata_from_dataframe(self, df): """Add metadata from a pandas DataFrame. diff --git a/doleus/storage/__init__.py b/doleus/storage/__init__.py index 0e1ded7..e958b4f 100644 --- a/doleus/storage/__init__.py +++ b/doleus/storage/__init__.py @@ -1,5 +1,13 @@ -from doleus.storage.ground_truth_store import GroundTruthStore +from doleus.storage.base_store import BasePredictionStore +from doleus.storage.classification_store import ClassificationPredictionStore +from doleus.storage.detection_store import DetectionPredictionStore +from doleus.storage.groundtruth_store import GroundTruthStore from doleus.storage.metadata_store import MetadataStore -from doleus.storage.prediction_store import PredictionStore -__all__ = ["MetadataStore", "PredictionStore", "GroundTruthStore"] +__all__ = [ + "BasePredictionStore", + "ClassificationPredictionStore", + "DetectionPredictionStore", + "GroundTruthStore", + "MetadataStore", +] diff --git a/doleus/storage/base_store.py b/doleus/storage/base_store.py new file mode 100644 index 0000000..eae633a --- /dev/null +++ b/doleus/storage/base_store.py @@ -0,0 +1,122 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Union + +import torch + +from doleus.annotations import Annotation, Annotations + + +class BasePredictionStore(ABC): + """Base storage for model predictions for a specific dataset instance.""" + + def __init__(self): + """Initialize the prediction store.""" + self.predictions: Dict[str, Annotations] = {} + + @abstractmethod + def add_predictions( + self, + predictions: Union[torch.Tensor, List[Dict[str, Any]]], + model_id: str, + **kwargs, + ) -> None: + """ + Store predictions for a model. + Actual implementation will depend on the task type (classification, detection). + + Parameters + ---------- + predictions : Union[torch.Tensor, List[Dict[str, Any]]] + Model predictions to store. For classification, this should be a + tensor of shape [N, C] where N is the number of samples and C is the + number of classes. For detection, this should be a list of dictionaries + with 'boxes', 'labels', and 'scores' keys. + model_id : str + Identifier of the specified model. + kwargs : dict + Additional arguments specific to the subclass implementation (e.g., 'task' for classification). + """ + pass + + @abstractmethod + def _process_predictions( + self, + predictions: Union[torch.Tensor, List[Dict[str, Any]], Annotations], + **kwargs, + ) -> Annotations: + """ + Process raw predictions into the standard annotation format. + Actual implementation will depend on the task type. + + Parameters + ---------- + predictions : Union[torch.Tensor, List[Dict[str, Any]], Annotations] + Raw predictions to process. + kwargs : dict + Additional arguments specific to the subclass implementation (e.g., 'task' for classification). + + Returns + ------- + Annotations + Processed predictions in standard annotation format. + """ + pass + + def get(self, model_id: str, datapoint_number: int) -> Annotation: + """Get a single annotation object by datapoint number. + + Parameters + ---------- + model_id : str + Identifier of the model to get predictions for. + datapoint_number : int + The ID of the sample in the dataset. + + Returns + ------- + Annotation + The specific Annotation object (e.g., Labels, BoundingBoxes) for the datapoint. + """ + if model_id not in self.predictions: + raise KeyError(f"No predictions found for model: {model_id}") + return self.predictions[model_id][datapoint_number] + + def get_subset(self, model_id: str, indices: List[int]) -> Annotations: + """Get a subset of predictions for a specific model based on indices. + + Parameters + ---------- + model_id : str + Identifier of the model to get predictions for. + indices : List[int] + List of indices to get predictions for. + + Returns + ------- + Annotations + An Annotations object containing predictions for the specified indices. + """ + if model_id not in self.predictions: + raise KeyError(f"No predictions found for model: {model_id}") + + subset_annotations = Annotations() + for i in indices: + subset_annotations.add(self.predictions[model_id][i]) + return subset_annotations + + def get_predictions(self, model_id: str) -> Annotations: + """Get all predictions for a specific model. + + Parameters + ---------- + model_id : str + Identifier of the model to get predictions for. + + Returns + ------- + Annotations + An Annotations object containing all predictions for the specified model. + """ + if model_id not in self.predictions: + raise KeyError(f"No predictions found for model: {model_id}") + return self.predictions[model_id] \ No newline at end of file diff --git a/doleus/storage/classification_store.py b/doleus/storage/classification_store.py new file mode 100644 index 0000000..003ee13 --- /dev/null +++ b/doleus/storage/classification_store.py @@ -0,0 +1,162 @@ +from typing import Any, Dict, List, Optional, Union + +import torch +from torch import Tensor + +from doleus.annotations import Annotations, Labels +from doleus.storage.base_store import BasePredictionStore +from doleus.utils import Task + + +class ClassificationPredictionStore(BasePredictionStore): + """Storage for classification model predictions.""" + + def add_predictions( + self, + predictions: torch.Tensor, + model_id: str, + task: str, + ) -> None: + """ + Store predictions for a classification model. + + Parameters + ---------- + predictions : torch.Tensor + Model predictions to store. + model_id : str + Identifier of the specified model. + task : str + The specific classification task (e.g., "multilabel", "multiclass", "binary"). + """ + if not isinstance(predictions, torch.Tensor): + raise TypeError("For classification, predictions must be a torch.Tensor.") + + processed_predictions = self._process_predictions(predictions, task=task) + self.predictions[model_id] = processed_predictions + + def _process_predictions( + self, + predictions: torch.Tensor, + task: str, + ) -> Annotations: + """Process raw classification predictions into the standard annotation format. + + The behavior depends on the `task` and the shape/dtype of `predictions`: + + - **Task.BINARY.value**: + - If `predictions` is 1D and `dtype` is float (scores for the positive class): + - `Labels.scores` will store the raw float score. + - `Labels.labels` will be `None`. + - If `predictions` is 1D and `dtype` is int (0 or 1): + - `Labels.labels` will store the integer label. + - `Labels.scores` will be `None`. + - 2D predictions currently raise a ValueError. + + - **Task.MULTICLASS.value**: + - If `predictions` is 1D and `dtype` is int (class indices): + - `Labels.labels` will store the integer class index. + - `Labels.scores` will be `None`. + - If `predictions` is 1D and `dtype` is float: Raises ValueError (expected class indices). + - If `predictions` is 2D (shape [N, C]) and `dtype` is float (logits or probabilities per class): + - If values are outside [0,1] (suggesting logits), `torch.softmax` is applied along the class dimension. + - `Labels.scores` will store the [C] float tensor of probabilities. + - `Labels.labels` will store the class index derived from `argmax` of these scores/original logits. + - If `predictions` is 2D and `dtype` is int: Raises ValueError (expected float scores/logits). + + - **Task.MULTILABEL.value**: + - If `predictions` is 2D (shape [N, C]) and `dtype` is float (logits or probabilities per class): + - If values are outside [0,1] (suggesting logits), `torch.sigmoid` is applied element-wise. + - `Labels.scores` will store the [C] float tensor of probabilities. + - `Labels.labels` will be `None`. + - If `predictions` is 2D (shape [N, C]) and `dtype` is int (multi-hot encoded): + - `Labels.labels` will store the [C] integer tensor. + - `Labels.scores` will be `None`. + - Boolean inputs are not supported for multilabel. 1D predictions or other dtypes for 2D currently raise a ValueError. + + Parameters + ---------- + predictions : torch.Tensor + Raw predictions to process. Typically shape [N] or [N, C]. + task : str + The specific classification task ("binary", "multiclass", "multilabel"). + + Returns + ------- + Annotations + Processed predictions where each element is a `Labels` object. + """ + processed = Annotations() + num_samples = predictions.shape[0] + + for i in range(num_samples): + current_labels: Optional[Tensor] = None + current_scores: Optional[Tensor] = None + + if task == Task.BINARY.value: + if predictions.dim() == 1: + if predictions.dtype.is_floating_point: + current_scores = predictions[i].unsqueeze(0) + current_labels = None # Scores are provided, so labels can be None + else: # Integer type + current_labels = predictions[i].unsqueeze(0) + current_scores = None + elif predictions.dim() == 2: + #TODO: We need to handle samplewise predictions at some point. + raise ValueError(f"{task} classification predictions must be 1D tensor. Got {predictions.dim()}D") + else: + raise ValueError(f"{task} classification predictions must be 1D or 2D tensor. Got {predictions.dim()}D") + + elif task == Task.MULTICLASS.value: + if predictions.dim() == 1: + if predictions.dtype.is_floating_point: + raise ValueError(f"For {task} with 1D predictions, dtype must be integer, got {predictions.dtype}") + else: # Integer type + current_labels = predictions[i].unsqueeze(0) + current_scores = None + elif predictions.dim() == 2: # Shape [N, C] + prediction_sample = predictions[i] # Shape [C] + if prediction_sample.dtype.is_floating_point: + current_labels = prediction_sample.argmax(dim=0).unsqueeze(0) + if torch.any(prediction_sample < 0) or torch.any(prediction_sample > 1): + current_scores = torch.softmax(prediction_sample, dim=0) + else: + # Assuming probabilities if values are within [0,1] + current_scores = prediction_sample + else: # Integer type + raise ValueError(f"For {task} with 2D predictions, dtype must be float (scores/logits), got {prediction_sample.dtype}") + else: + raise ValueError(f"{task} classification predictions must be 1D or 2D tensor. Got {predictions.dim()}D") + + elif task == Task.MULTILABEL.value: + if predictions.dim() == 2: # Expect [N, C] + prediction_sample = predictions[i] # Shape [C] + + if prediction_sample.dtype.is_floating_point: + if torch.any(prediction_sample < 0) or torch.any(prediction_sample > 1): + probabilities = torch.sigmoid(prediction_sample) + else: + probabilities = prediction_sample + current_scores = probabilities + current_labels = None + + elif prediction_sample.dtype.is_integer(): + current_labels = prediction_sample.int() + current_scores = None + else: + raise ValueError( + f"For {task}, 2D prediction samples must be float or integer. Got {prediction_sample.dtype} for sample {i}" + ) + else: + raise ValueError( + f"{task} classification predictions must be a 2D tensor of shape [N, C]. " + f"Got {predictions.dim()}D with shape {predictions.shape}" + ) + + else: + raise ValueError(f"Unsupported task: {task}") + + ann = Labels(datapoint_number=i, labels=current_labels, scores=current_scores) + processed.add(ann) + + return processed \ No newline at end of file diff --git a/doleus/storage/detection_store.py b/doleus/storage/detection_store.py new file mode 100644 index 0000000..d036ada --- /dev/null +++ b/doleus/storage/detection_store.py @@ -0,0 +1,85 @@ +from typing import Any, Dict, List, Union + +import torch + +from doleus.annotations import Annotations, BoundingBoxes +from doleus.storage.base_store import BasePredictionStore + + +class DetectionPredictionStore(BasePredictionStore): + """Storage for detection model predictions.""" + + def add_predictions( + self, + predictions: List[Dict[str, Any]], + model_id: str, + **kwargs, + ) -> None: + """ + Store predictions for a detection model. + + Parameters + ---------- + predictions : List[Dict[str, Any]] + Model predictions to store. This should be a list of dictionaries, + each with 'boxes', 'labels', and 'scores' keys. + model_id : str + Identifier of the specified model. + """ + if not isinstance(predictions, list): + raise TypeError("For detection, predictions must be a list.") + if not all(isinstance(p, dict) for p in predictions): + raise TypeError("Each item in detection predictions list must be a dictionary.") + + processed_predictions = self._process_predictions(predictions) + self.predictions[model_id] = processed_predictions + + def _process_predictions( + self, + predictions: List[Dict[str, Any]], + **kwargs, + ) -> Annotations: + """Process raw detection predictions into the standard annotation format. + + Parameters + ---------- + predictions : List[Dict[str, Any]] + Raw predictions to process. Must be a list of dictionaries. + + Returns + ------- + Annotations + Processed predictions in standard annotation format. + """ + processed = Annotations() + + for i, pred_dict in enumerate(predictions): + # Validate keys + required_keys = {"boxes", "labels", "scores"} + if not required_keys.issubset(pred_dict.keys()): + raise ValueError( + f"Detection prediction dict for sample {i} missing keys. " + f"Required: {required_keys}, Got: {list(pred_dict.keys())}" + ) + + boxes_xyxy = torch.as_tensor(pred_dict["boxes"], dtype=torch.float32) + labels = torch.as_tensor(pred_dict["labels"], dtype=torch.long) + scores = torch.as_tensor(pred_dict["scores"], dtype=torch.float32) + + # Validate shapes + num_detections = boxes_xyxy.shape[0] + if not (boxes_xyxy.ndim == 2 and boxes_xyxy.shape[1] == 4): + raise ValueError(f"boxes for sample {i} must have shape (M,4), Got: {boxes_xyxy.shape}") + if not (labels.ndim == 1 and labels.shape[0] == num_detections): + raise ValueError(f"labels for sample {i} must have shape (M,), Got: {labels.shape}") + if not (scores.ndim == 1 and scores.shape[0] == num_detections): + raise ValueError(f"scores for sample {i} must have shape (M,), Got: {scores.shape}") + + ann = BoundingBoxes( + datapoint_number=i, + boxes_xyxy=boxes_xyxy, + labels=labels, + scores=scores, + ) + processed.add(ann) + return processed \ No newline at end of file diff --git a/doleus/storage/prediction_store.py b/doleus/storage/prediction_store.py deleted file mode 100644 index 81092b5..0000000 --- a/doleus/storage/prediction_store.py +++ /dev/null @@ -1,238 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import torch - -from doleus.annotations import Annotations, BoundingBoxes, Labels -from doleus.utils import Task, TaskType - - -class PredictionStore: - """Storage for model predictions for a specific dataset instance. - - Each Doleus Dataset has its own PredictionStore instance to manage - predictions from different models for that specific dataset. - """ - - def __init__(self, task_type: str): - """Initialize the prediction store. - - Parameters - ---------- - task_type : str - Type of task (e.g., "classification", "detection"). - """ - self.task_type = task_type - self.predictions: Dict[str, Annotations] = {} - - def add_predictions( - self, - predictions: Union[torch.Tensor, List[Dict[str, Any]]], - model_id: str, - task: Optional[str] = None, - ) -> None: - """ - Store predictions for a model. - - Parameters - ---------- - predictions : Union[torch.Tensor, List[Dict[str, Any]]] - Model predictions to store. For classification, this should be a - tensor of shape [N, C] where N is the number of samples and C is the - number of classes. For detection, this should be a list of dictionaries - with 'boxes', 'labels', and 'scores' keys. - model_id : str - Identifier of the specified model. - task : Optional[str], optional - The specific task (e.g., "multilabel", "multiclass"), by default None. - """ - processed_predictions = self._process_predictions(predictions, task) - self.predictions[model_id] = processed_predictions - - def _process_predictions( - self, - predictions: Union[torch.Tensor, List[Dict[str, Any]], Annotations], - task: Optional[str] = None, - ) -> Annotations: - """Process raw predictions into the standard annotation format. - - Parameters - ---------- - predictions : Union[torch.Tensor, List[Dict[str, Any]], Annotations] - Raw predictions to process. Can be: - - A torch.Tensor for classification tasks - - A list of dictionaries for detection tasks - - An already processed Annotations object - task : Optional[str], optional - The specific task (e.g., "multilabel", "multiclass"), by default None. - - - Returns - ------- - Annotations - Processed predictions in standard annotation format. - """ - if isinstance(predictions, Annotations) and ( - isinstance(predictions[0], Labels) - or isinstance(predictions[0], BoundingBoxes) - ): - return predictions - - processed = Annotations() - - if self.task_type == TaskType.CLASSIFICATION.value: - if not isinstance(predictions, torch.Tensor): - raise TypeError( - "For classification, predictions must be a torch.Tensor." - ) - - num_samples = predictions.shape[0] - - if predictions.dim() == 1: - # Assume these are predicted labels (class IDs) for single-label tasks - for i in range(num_samples): - label_val = predictions[i].unsqueeze(0) # Ensure [1] shape - ann = Labels(datapoint_number=i, labels=label_val, scores=None) - processed.add(ann) - - elif predictions.dim() == 2: # Shape [N, C] - for i in range(num_samples): - prediction_row = predictions[i] # This is the [C] tensor for the i-th sample - current_labels: torch.Tensor - current_scores: Optional[torch.Tensor] - - if task == Task.MULTILABEL.value: - if prediction_row.dtype in (torch.long, torch.int, torch.bool): - # Input is integer multi-hot - current_labels = prediction_row - current_scores = None - else: # Float input, assumed to be logits or probabilities - # Apply sigmoid if not already probabilities in [0,1] - if not (prediction_row.min() >= 0 and prediction_row.max() <= 1): - processed_scores_for_row = torch.sigmoid(prediction_row) - else: # Already probabilities - processed_scores_for_row = prediction_row - - current_labels = (processed_scores_for_row >= 0.5).long() # Default threshold 0.5 - current_scores = processed_scores_for_row - - ann = Labels( - datapoint_number=i, - labels=current_labels, - scores=current_scores, - ) - else: # Binary, Multiclass, or task is None (default to old behavior) - current_labels = prediction_row.argmax(dim=0).unsqueeze(0) # [1] tensor - - if prediction_row.dtype == torch.float: - current_scores = torch.softmax(prediction_row, dim=0) # [C] tensor - else: # Integer input - current_scores = None - - ann = Labels( - datapoint_number=i, - labels=current_labels, - scores=current_scores, - ) - processed.add(ann) - else: - raise ValueError( - "Classification predictions must be a 1D or 2D tensor." - ) - - elif self.task_type == TaskType.DETECTION.value: - if not isinstance(predictions, list): - raise TypeError( - "For detection, predictions must be a list of length N." - ) - if not all(isinstance(p, dict) for p in predictions): - raise TypeError( - "Each item in detection predictions list must be a dictionary." - ) - - - # Each element should look like {"boxes": (M,4), "labels": (M,), "scores": (M,)} - for i, pred_dict in enumerate(predictions): - # Validate keys - required_keys = {"boxes", "labels", "scores"} - if not required_keys.issubset(pred_dict.keys()): - raise ValueError(f"Detection prediction dict for sample {i} missing keys. Required: {required_keys}") - - boxes_xyxy = torch.as_tensor(pred_dict["boxes"], dtype=torch.float32) - labels = torch.as_tensor(pred_dict["labels"], dtype=torch.long) - scores = torch.as_tensor(pred_dict["scores"], dtype=torch.float32) - - # Validate shapes - num_detections = boxes_xyxy.shape[0] - if not (boxes_xyxy.ndim == 2 and boxes_xyxy.shape[1] == 4): - raise ValueError(f"boxes for sample {i} must have shape (M,4)") - if not (labels.ndim == 1 and labels.shape[0] == num_detections): - raise ValueError(f"labels for sample {i} must have shape (M,)") - if not (scores.ndim == 1 and scores.shape[0] == num_detections): - raise ValueError(f"scores for sample {i} must have shape (M,)") - - - ann = BoundingBoxes( - datapoint_number=i, - boxes_xyxy=boxes_xyxy, - labels=labels, - scores=scores, - ) - processed.add(ann) - else: - raise ValueError(f"Unsupported task type: {self.task_type}") - - - return processed - - def get(self, model_id: str, datapoint_number: int): - """Get annotation by datapoint number. - - Parameters - ---------- - datapoint_number : int - The ID of the sample in the dataset. - - Returns - ------- - Annotation - The annotation for the datapoint. - """ - if model_id not in self.predictions: - raise KeyError(f"No predictions found for model: {model_id}") - return self.predictions[model_id][datapoint_number] - - def get_subset(self, model_id: str, indices: List[int]) -> List[Any]: - """Get a subset of predictions for a specific model based on indices. - - Parameters - ---------- - model_id : str - Identifier of the model to get predictions for. - indices : List[int] - List of indices to get predictions for. - - Returns - ------- - List[Any] - List of predictions for the specified indices. - """ - if model_id not in self.predictions: - raise KeyError(f"No predictions found for model: {model_id}") - return [self.predictions[model_id][i] for i in indices] - - def get_predictions(self, model_id: str) -> List[Any]: - """Get all predictions for a specific model. - - Parameters - ---------- - model_id : str - Identifier of the model to get predictions for. - - Returns - ------- - List[Any] - List of all predictions for the specified model. - """ - if model_id not in self.predictions: - raise KeyError(f"No predictions found for model: {model_id}") - return self.predictions[model_id].annotations From 3a66cc5b15ad7cd564e4f344acb9fb3110a67fcf Mon Sep 17 00:00:00 2001 From: iamheinrich <76793837+iamheinrich@users.noreply.github.com> Date: Sat, 24 May 2025 08:54:53 +0200 Subject: [PATCH 3/6] refactor: refactored groundtruth store. now split up in detection and classification store --- doleus/datasets/base.py | 20 +-- doleus/datasets/classification.py | 19 ++- doleus/datasets/detection.py | 12 +- doleus/storage/__init__.py | 13 +- doleus/storage/base_store.py | 56 ++++++++- .../classification_ground_truth_store.py | 115 ++++++++++++++++++ ....py => classification_prediction_store.py} | 0 .../storage/detection_ground_truth_store.py | 89 ++++++++++++++ ...store.py => detection_prediction_store.py} | 0 doleus/storage/ground_truth_store.py | 89 -------------- 10 files changed, 296 insertions(+), 117 deletions(-) create mode 100644 doleus/storage/classification_ground_truth_store.py rename doleus/storage/{classification_store.py => classification_prediction_store.py} (100%) create mode 100644 doleus/storage/detection_ground_truth_store.py rename doleus/storage/{detection_store.py => detection_prediction_store.py} (100%) delete mode 100644 doleus/storage/ground_truth_store.py diff --git a/doleus/datasets/base.py b/doleus/datasets/base.py index 4dd8fa4..2e91e91 100644 --- a/doleus/datasets/base.py +++ b/doleus/datasets/base.py @@ -8,11 +8,9 @@ from doleus.annotations import BoundingBoxes, Labels from doleus.storage import ( - ClassificationPredictionStore, - DetectionPredictionStore, - GroundTruthStore, MetadataStore, ) +from doleus.storage.base_store import BasePredictionStore, BaseGroundTruthStore from doleus.utils import ( ATTRIBUTE_FUNCTIONS, OPERATOR_DICT, @@ -76,18 +74,10 @@ def __init__( self.metadata = metadata if metadata is not None else {} self.metadata["_timestamp"] = get_current_timestamp() - self.groundtruth_store = GroundTruthStore(task_type=task_type, dataset=dataset) - - if self.task_type == TaskType.CLASSIFICATION.value: - if not self.task: - raise ValueError( - "For classification task_type, a specific 'task' (e.g., binary, multiclass, multilabel) must be provided." - ) - self.prediction_store = ClassificationPredictionStore() - elif self.task_type == TaskType.DETECTION.value: - self.prediction_store = DetectionPredictionStore() - else: - raise ValueError(f"Unsupported task_type: {self.task_type} for PredictionStore assignment") + # Ground truth and prediction stores are initialized to None in the base class. + # Specific instantiations will be handled by subclasses (DoleusClassification, DoleusDetection). + self.groundtruth_store: Optional[BaseGroundTruthStore] = None + self.prediction_store: Optional[BasePredictionStore] = None self.metadata_store = MetadataStore( num_datapoints=len(dataset), metadata=per_datapoint_metadata diff --git a/doleus/datasets/classification.py b/doleus/datasets/classification.py index b4116df..1834aa4 100644 --- a/doleus/datasets/classification.py +++ b/doleus/datasets/classification.py @@ -5,6 +5,9 @@ from doleus.datasets.base import Doleus from doleus.utils import TaskType +from doleus.storage.classification_ground_truth_store import ClassificationGroundTruthStore +from doleus.storage.classification_prediction_store import ClassificationPredictionStore +from doleus.annotations import Annotations class DoleusClassification(Doleus): @@ -49,6 +52,12 @@ def __init__( metadata=metadata, per_datapoint_metadata=per_datapoint_metadata, ) + self.groundtruth_store = ClassificationGroundTruthStore( + dataset=self.dataset, + task=self.task, + num_classes=self.num_classes + ) + self.prediction_store = ClassificationPredictionStore() def _create_new_instance(self, dataset, indices, name): # TODO: Do we need to create a new dataset instance? @@ -64,8 +73,12 @@ def _create_new_instance(self, dataset, indices, name): per_datapoint_metadata=metadata_subset, ) - for model_id in self.prediction_store.predictions: - sliced_preds = self.prediction_store.get_subset(model_id, indices) - new_instance.prediction_store.add_predictions(sliced_preds, model_id) + # Correctly transfer sliced predictions + if self.prediction_store and self.prediction_store.predictions: + for model_id in self.prediction_store.predictions: + # get_subset already returns an Annotations object with re-indexed datapoint_numbers + sliced_preds_annotations = self.prediction_store.get_subset(model_id, indices) + # Directly assign the Annotations object to the new instance's store + new_instance.prediction_store.predictions[model_id] = sliced_preds_annotations return new_instance diff --git a/doleus/datasets/detection.py b/doleus/datasets/detection.py index 90a5497..867bc89 100644 --- a/doleus/datasets/detection.py +++ b/doleus/datasets/detection.py @@ -4,6 +4,9 @@ from doleus.datasets.base import Doleus from doleus.utils import TaskType +from doleus.storage.detection_ground_truth_store import DetectionGroundTruthStore +from doleus.storage.detection_prediction_store import DetectionPredictionStore +from doleus.annotations import Annotations class DoleusDetection(Doleus): @@ -40,6 +43,8 @@ def __init__( metadata=metadata, per_datapoint_metadata=per_datapoint_metadata, ) + self.groundtruth_store = DetectionGroundTruthStore(dataset=self.dataset) + self.prediction_store = DetectionPredictionStore() def _create_new_instance(self, dataset, indices, slice_name): subset = Subset(dataset, indices) @@ -52,8 +57,9 @@ def _create_new_instance(self, dataset, indices, slice_name): per_datapoint_metadata=new_metadata, ) - for model_id in self.prediction_store.predictions: - sliced_preds = self.prediction_store.get_subset(model_id, indices) - new_instance.prediction_store.add_predictions(sliced_preds, model_id) + if self.prediction_store and self.prediction_store.predictions: + for model_id in self.prediction_store.predictions: + sliced_preds_annotations = self.prediction_store.get_subset(model_id, indices) + new_instance.prediction_store.predictions[model_id] = sliced_preds_annotations return new_instance diff --git a/doleus/storage/__init__.py b/doleus/storage/__init__.py index e958b4f..b9a56c4 100644 --- a/doleus/storage/__init__.py +++ b/doleus/storage/__init__.py @@ -1,13 +1,16 @@ -from doleus.storage.base_store import BasePredictionStore -from doleus.storage.classification_store import ClassificationPredictionStore -from doleus.storage.detection_store import DetectionPredictionStore -from doleus.storage.groundtruth_store import GroundTruthStore +from doleus.storage.base_store import BasePredictionStore, BaseGroundTruthStore +from doleus.storage.classification_ground_truth_store import ClassificationGroundTruthStore +from doleus.storage.classification_prediction_store import ClassificationPredictionStore +from doleus.storage.detection_ground_truth_store import DetectionGroundTruthStore +from doleus.storage.detection_prediction_store import DetectionPredictionStore from doleus.storage.metadata_store import MetadataStore __all__ = [ + "BaseGroundTruthStore", "BasePredictionStore", + "ClassificationGroundTruthStore", "ClassificationPredictionStore", + "DetectionGroundTruthStore", "DetectionPredictionStore", - "GroundTruthStore", "MetadataStore", ] diff --git a/doleus/storage/base_store.py b/doleus/storage/base_store.py index eae633a..f2899a6 100644 --- a/doleus/storage/base_store.py +++ b/doleus/storage/base_store.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Union, Optional import torch @@ -119,4 +119,56 @@ def get_predictions(self, model_id: str) -> Annotations: """ if model_id not in self.predictions: raise KeyError(f"No predictions found for model: {model_id}") - return self.predictions[model_id] \ No newline at end of file + return self.predictions[model_id] + + +class BaseGroundTruthStore(ABC): + """Base storage for ground truth data for a specific dataset instance.""" + + def __init__(self, dataset: Any): + """ + Initialize the ground truth store. + + Parameters + ---------- + dataset : Any + The raw PyTorch dataset object. + """ + self.dataset = dataset + self.groundtruths: Optional[Annotations] = None + self.groundtruths = self._process_groundtruths() + + @abstractmethod + def _process_groundtruths(self) -> Annotations: + """ + Process raw ground truth data from the dataset into the standard annotation format. + Actual implementation will depend on the task type (classification, detection). + + Returns + ------- + Annotations + Processed ground truths in standard annotation format. + """ + pass + + def get(self, datapoint_number: int) -> Optional[Annotation]: + """ + Get a single ground truth annotation object by datapoint number. + + Parameters + ---------- + datapoint_number : int + The ID of the sample in the dataset. + + Returns + ------- + Optional[Annotation] + The specific Annotation object (e.g., Labels, BoundingBoxes) for the datapoint, + or None if not found. + """ + if self.groundtruths is None: + return None + try: + return self.groundtruths[datapoint_number] + except KeyError: + return None \ No newline at end of file diff --git a/doleus/storage/classification_ground_truth_store.py b/doleus/storage/classification_ground_truth_store.py new file mode 100644 index 0000000..d8e204d --- /dev/null +++ b/doleus/storage/classification_ground_truth_store.py @@ -0,0 +1,115 @@ +import torch +from typing import Any + +from doleus.storage.base_store import BaseGroundTruthStore +from doleus.annotations import Annotations +from doleus.annotations.classification import Labels +from doleus.utils.data import Task + + +class ClassificationGroundTruthStore(BaseGroundTruthStore): + """Ground truth store for classification tasks.""" + + def __init__(self, dataset: Any, task: str, num_classes: int): + """ + Initialize the classification ground truth store. + + Parameters + ---------- + dataset : Any + The raw PyTorch dataset object. + task : str + The specific classification task (e.g., Task.BINARY.value, Task.MULTICLASS.value, Task.MULTILABEL.value). + num_classes : int + The number of classes for the task. + """ + self.task = task + self.num_classes = num_classes + super().__init__(dataset) + + def _process_groundtruths(self) -> Annotations: + """ + Process raw ground truth data from the dataset for classification tasks. + + Returns + ------- + Annotations + Processed ground truths in standard annotation format. + + Raises + ------ + ValueError + If the task is unsupported or if ground truth data is in an invalid format. + """ + processed_annotations = Annotations() + + for idx, data in enumerate(self.dataset): + # Assuming standard (image, label) structure for dataset items + if not (isinstance(data, (list, tuple)) and len(data) > 1): + raise ValueError( + f"Dataset item at index {idx} is not in the expected format (e.g., (image, target)). " + f"Got: {type(data)}" + ) + + label = data[1] + processed_label_tensor: torch.Tensor + + if self.task == Task.BINARY.value: + if not isinstance(label, torch.Tensor): + label = torch.tensor(label) + + if not (label.ndim == 0 or (label.ndim == 1 and label.numel() == 1)): + raise ValueError( + f"Binary ground truth for item {idx} must be a scalar or 1-element tensor. Got shape: {label.shape}" + ) + if not (label.item() == 0 or label.item() == 1): + raise ValueError( + f"Binary ground truth for item {idx} must be 0 or 1. Got: {label.item()}" + ) + processed_label_tensor = torch.tensor([label.item()], dtype=torch.long) + + elif self.task == Task.MULTICLASS.value: + if not isinstance(label, torch.Tensor): + label = torch.tensor(label) + + if not (label.ndim == 0 or (label.ndim == 1 and label.numel() == 1)): + raise ValueError( + f"Multiclass ground truth for item {idx} must be a scalar or 1-element tensor. Got shape: {label.shape}" + ) + label_value = label.item() + if not (0 <= label_value < self.num_classes): + raise ValueError( + f"Multiclass ground truth for item {idx} must be between 0 and {self.num_classes - 1}. Got: {label_value}" + ) + processed_label_tensor = torch.tensor([label_value], dtype=torch.long) + + elif self.task == Task.MULTILABEL.value: + if not isinstance(label, torch.Tensor): + try: + label = torch.tensor(label) + except Exception as e: + raise ValueError(f"Could not convert label for item {idx} to tensor: {label}. Error: {e}") + + if label.dim() != 1: + raise ValueError(f"Multilabel ground truth for item {idx} must be a 1D tensor. Got {label.dim()} dimensions.") + if label.shape[0] != self.num_classes: + raise ValueError( + f"Multilabel ground truth tensor shape for item {idx} must be ({self.num_classes},). Got {label.shape}." + ) + if not (label.dtype == torch.int or label.dtype == torch.long): + raise ValueError( + f"Multilabel ground truth tensor for item {idx} must be of integer type (torch.int or torch.long). Got {label.dtype}." + ) + if not torch.all((label == 0) | (label == 1)): + raise ValueError( + f"Multilabel ground truth tensor for item {idx} must be multi-hot encoded (contain only 0s and 1s). Got: {label}" + ) + processed_label_tensor = label.long() + + else: + raise ValueError(f"Unsupported task for ClassificationGroundTruthStore: {self.task}") + + ann = Labels(datapoint_number=idx, labels=processed_label_tensor, scores=None) # scores=None for ground truth + processed_annotations.add(ann) + + return processed_annotations \ No newline at end of file diff --git a/doleus/storage/classification_store.py b/doleus/storage/classification_prediction_store.py similarity index 100% rename from doleus/storage/classification_store.py rename to doleus/storage/classification_prediction_store.py diff --git a/doleus/storage/detection_ground_truth_store.py b/doleus/storage/detection_ground_truth_store.py new file mode 100644 index 0000000..81f0fd9 --- /dev/null +++ b/doleus/storage/detection_ground_truth_store.py @@ -0,0 +1,89 @@ +import torch +from typing import Any + +from doleus.storage.base_store import BaseGroundTruthStore +from doleus.annotations import Annotations +from doleus.annotations.detection import BoundingBoxes + + +class DetectionGroundTruthStore(BaseGroundTruthStore): + """Ground truth store for detection tasks.""" + + def __init__(self, dataset: Any): + """ + Initialize the detection ground truth store. + + Parameters + ---------- + dataset : Any + The raw PyTorch dataset object. + """ + super().__init__(dataset) # This will call _process_groundtruths + + def _process_groundtruths(self) -> Annotations: + """ + Process raw ground truth data from the dataset for detection tasks. + + Returns + ------- + Annotations + Processed ground truths in standard annotation format. + + Raises + ------ + ValueError + If ground truth data is in an invalid format. + """ + processed_annotations = Annotations() + + for idx, data in enumerate(self.dataset): + # Assuming standard (image, bounding_boxes, labels) structure for dataset items + if not (isinstance(data, (list, tuple)) and len(data) == 3): + raise ValueError( + f"Dataset item at index {idx} is not in the expected format (image, bounding_boxes, labels). " + f"Got {len(data)} elements of type: {type(data)}" + ) + + _, raw_boxes, raw_labels = data + + # Convert to tensors + if not isinstance(raw_boxes, torch.Tensor): + try: + bounding_boxes = torch.tensor(raw_boxes, dtype=torch.float32) + except Exception as e: + raise ValueError(f"Could not convert bounding_boxes for item {idx} to tensor: {raw_boxes}. Error: {e}") + else: + bounding_boxes = raw_boxes.float() # Ensure correct dtype + + if not isinstance(raw_labels, torch.Tensor): + try: + labels = torch.tensor(raw_labels, dtype=torch.long) + except Exception as e: + raise ValueError(f"Could not convert labels for item {idx} to tensor: {raw_labels}. Error: {e}") + else: + labels = raw_labels.long() # Ensure correct dtype + + # Validate shapes + # Assuming M is the number of detected objects for this datapoint + # Bounding boxes should be (M, 4) + if bounding_boxes.ndim != 2 or bounding_boxes.shape[1] != 4: + raise ValueError( + f"Bounding boxes for item {idx} must have shape (M, 4). Got shape: {bounding_boxes.shape}" + ) + + # Labels should be (M,) + num_detections = bounding_boxes.shape[0] + if not (labels.ndim == 1 and labels.shape[0] == num_detections): + raise ValueError( + f"Labels for item {idx} must have shape (M,). Got shape: {labels.shape}, expected M={num_detections}" + ) + + ann = BoundingBoxes( + datapoint_number=idx, + boxes_xyxy=bounding_boxes, + labels=labels, + scores=None + ) + processed_annotations.add(ann) + + return processed_annotations \ No newline at end of file diff --git a/doleus/storage/detection_store.py b/doleus/storage/detection_prediction_store.py similarity index 100% rename from doleus/storage/detection_store.py rename to doleus/storage/detection_prediction_store.py diff --git a/doleus/storage/ground_truth_store.py b/doleus/storage/ground_truth_store.py deleted file mode 100644 index 4426e63..0000000 --- a/doleus/storage/ground_truth_store.py +++ /dev/null @@ -1,89 +0,0 @@ -from typing import Any, Optional - -import torch - -from doleus.annotations import Annotations, BoundingBoxes, Labels -from doleus.utils import TaskType - - -class GroundTruthStore: - """Storage for ground truth annotations for a specific dataset instance. - - Each Doleus Dataset has its own GroundTruthStore instance to manage - ground truth annotations for that specific dataset. - """ - - def __init__(self, task_type: str, dataset: Any): - """Initialize the ground truth store. - - Parameters - ---------- - task_type : str - Type of task (e.g., "classification", "detection"). - dataset : Any - The underlying dataset to process ground truths from. - """ - self.task_type = task_type - self.dataset = dataset - self.groundtruths: Optional[Annotations] = None - self._process_groundtruths() - - def _process_groundtruths(self): - """Process and store ground truth annotations from the dataset.""" - groundtruths = Annotations() - - if self.task_type == TaskType.CLASSIFICATION.value: - for idx in range(len(self.dataset)): - data = self.dataset[idx] - if len(data) < 2: - raise ValueError( - f"Expected (image, label(s)) from dataset at index {idx}, got {len(data)} elements." - ) - _, labels = data - - # Convert label(s) to tensor of shape [N] if needed - if not isinstance(labels, torch.Tensor): - labels = torch.tensor(labels) - if labels.dim() == 0: - labels = labels.unsqueeze(0) - - ann = Labels(datapoint_number=idx, labels=labels) - groundtruths.add(ann) - - elif self.task_type == TaskType.DETECTION.value: - for idx in range(len(self.dataset)): - data = self.dataset[idx] - if len(data) != 3: - raise ValueError( - f"Expected (image, bounding_boxes, labels) for detection at index {idx}, got {len(data)} elements." - ) - _, bounding_boxes, labels = data - - if not isinstance(bounding_boxes, torch.Tensor): - bounding_boxes = torch.tensor(bounding_boxes, dtype=torch.float32) - if not isinstance(labels, torch.Tensor): - labels = torch.tensor(labels, dtype=torch.long) - - ann = BoundingBoxes( - datapoint_number=idx, boxes_xyxy=bounding_boxes, labels=labels - ) - groundtruths.add(ann) - - self.groundtruths = groundtruths - - def get(self, datapoint_number: int): - """Get annotation by datapoint number. - - Parameters - ---------- - datapoint_number : int - The ID of the sample in the dataset. - - Returns - ------- - Annotation - The annotation for the datapoint. - """ - if self.groundtruths is None: - raise ValueError("No ground truth annotations found") - return self.groundtruths[datapoint_number] From 1f282a3c7f9ea816f7f107dd0f045d3b20f7e17e Mon Sep 17 00:00:00 2001 From: Niklas Schmolenski Date: Sat, 24 May 2025 14:19:31 +0200 Subject: [PATCH 4/6] fix: minor fix --- doleus/metrics/calculator.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doleus/metrics/calculator.py b/doleus/metrics/calculator.py index 85b7a74..b53fba3 100644 --- a/doleus/metrics/calculator.py +++ b/doleus/metrics/calculator.py @@ -79,10 +79,7 @@ def _calculate_classification( try: gt_tensor = torch.stack([ann.labels.squeeze() for ann in groundtruths]) - pred_list = [ - ann.scores if ann.scores is not None else ann.labels.squeeze() - for ann in predictions - ] + pred_list = [ann.labels.squeeze() for ann in predictions] if not pred_list: raise ValueError("No predictions provided to compute the metric.") pred_tensor = torch.stack(pred_list) From 1198750bf4113c26bb9039772cdfbbdfad5453fb Mon Sep 17 00:00:00 2001 From: iamheinrich <76793837+iamheinrich@users.noreply.github.com> Date: Sat, 24 May 2025 20:03:50 +0200 Subject: [PATCH 5/6] fix: there was a discprency in the new more modular store structure when adding predictions. This is now solvedand re-indexing works fine. --- doleus/datasets/classification.py | 19 ++++------- doleus/storage/base_store.py | 15 ++++----- .../classification_prediction_store.py | 32 ++++++++++++++++++ doleus/storage/detection_prediction_store.py | 33 +++++++++++++++++++ 4 files changed, 79 insertions(+), 20 deletions(-) diff --git a/doleus/datasets/classification.py b/doleus/datasets/classification.py index 1834aa4..bb663cd 100644 --- a/doleus/datasets/classification.py +++ b/doleus/datasets/classification.py @@ -7,7 +7,6 @@ from doleus.utils import TaskType from doleus.storage.classification_ground_truth_store import ClassificationGroundTruthStore from doleus.storage.classification_prediction_store import ClassificationPredictionStore -from doleus.annotations import Annotations class DoleusClassification(Doleus): @@ -52,15 +51,14 @@ def __init__( metadata=metadata, per_datapoint_metadata=per_datapoint_metadata, ) + + # Instantiate the classification-specific stores self.groundtruth_store = ClassificationGroundTruthStore( - dataset=self.dataset, - task=self.task, - num_classes=self.num_classes + dataset=self.dataset, task=self.task, num_classes=self.num_classes ) self.prediction_store = ClassificationPredictionStore() def _create_new_instance(self, dataset, indices, name): - # TODO: Do we need to create a new dataset instance? subset = Subset(dataset, indices) metadata_subset = self.metadata_store.get_subset(indices) new_instance = DoleusClassification( @@ -73,12 +71,9 @@ def _create_new_instance(self, dataset, indices, name): per_datapoint_metadata=metadata_subset, ) - # Correctly transfer sliced predictions - if self.prediction_store and self.prediction_store.predictions: - for model_id in self.prediction_store.predictions: - # get_subset already returns an Annotations object with re-indexed datapoint_numbers - sliced_preds_annotations = self.prediction_store.get_subset(model_id, indices) - # Directly assign the Annotations object to the new instance's store - new_instance.prediction_store.predictions[model_id] = sliced_preds_annotations + # Copy sliced predictions directly to the new instance + for model_id in self.prediction_store.predictions: + sliced_preds = self.prediction_store.get_subset(model_id, indices) + new_instance.prediction_store.predictions[model_id] = sliced_preds return new_instance diff --git a/doleus/storage/base_store.py b/doleus/storage/base_store.py index f2899a6..7ffddaf 100644 --- a/doleus/storage/base_store.py +++ b/doleus/storage/base_store.py @@ -81,9 +81,13 @@ def get(self, model_id: str, datapoint_number: int) -> Annotation: raise KeyError(f"No predictions found for model: {model_id}") return self.predictions[model_id][datapoint_number] + @abstractmethod def get_subset(self, model_id: str, indices: List[int]) -> Annotations: """Get a subset of predictions for a specific model based on indices. + Each subclass must implement this method to handle re-indexing for their + specific annotation types (Labels for classification, BoundingBoxes for detection). + Parameters ---------- model_id : str @@ -94,15 +98,10 @@ def get_subset(self, model_id: str, indices: List[int]) -> Annotations: Returns ------- Annotations - An Annotations object containing predictions for the specified indices. + An Annotations object containing predictions for the specified indices, + with datapoint_number values re-indexed starting from 0. """ - if model_id not in self.predictions: - raise KeyError(f"No predictions found for model: {model_id}") - - subset_annotations = Annotations() - for i in indices: - subset_annotations.add(self.predictions[model_id][i]) - return subset_annotations + pass def get_predictions(self, model_id: str) -> Annotations: """Get all predictions for a specific model. diff --git a/doleus/storage/classification_prediction_store.py b/doleus/storage/classification_prediction_store.py index 003ee13..43eebdb 100644 --- a/doleus/storage/classification_prediction_store.py +++ b/doleus/storage/classification_prediction_store.py @@ -35,6 +35,38 @@ def add_predictions( processed_predictions = self._process_predictions(predictions, task=task) self.predictions[model_id] = processed_predictions + def get_subset(self, model_id: str, indices: List[int]) -> Annotations: + """Get a subset of predictions for a specific model based on indices. + + Parameters + ---------- + model_id : str + Identifier of the model to get predictions for. + indices : List[int] + List of indices to get predictions for. + + Returns + ------- + Annotations + An Annotations object containing predictions for the specified indices, + with datapoint_number values re-indexed starting from 0. + """ + if model_id not in self.predictions: + raise KeyError(f"No predictions found for model: {model_id}") + + subset_annotations = Annotations() + for new_idx, original_idx in enumerate(indices): + original_annotation = self.predictions[model_id][original_idx] + + # Create a new Labels annotation with re-indexed datapoint_number + new_annotation = Labels( + datapoint_number=new_idx, + labels=original_annotation.labels, + scores=original_annotation.scores + ) + subset_annotations.add(new_annotation) + return subset_annotations + def _process_predictions( self, predictions: torch.Tensor, diff --git a/doleus/storage/detection_prediction_store.py b/doleus/storage/detection_prediction_store.py index d036ada..3ffbb5b 100644 --- a/doleus/storage/detection_prediction_store.py +++ b/doleus/storage/detection_prediction_store.py @@ -34,6 +34,39 @@ def add_predictions( processed_predictions = self._process_predictions(predictions) self.predictions[model_id] = processed_predictions + def get_subset(self, model_id: str, indices: List[int]) -> Annotations: + """Get a subset of predictions for a specific model based on indices. + + Parameters + ---------- + model_id : str + Identifier of the model to get predictions for. + indices : List[int] + List of indices to get predictions for. + + Returns + ------- + Annotations + An Annotations object containing predictions for the specified indices, + with datapoint_number values re-indexed starting from 0. + """ + if model_id not in self.predictions: + raise KeyError(f"No predictions found for model: {model_id}") + + subset_annotations = Annotations() + for new_idx, original_idx in enumerate(indices): + original_annotation = self.predictions[model_id][original_idx] + + # Create a new BoundingBoxes annotation with re-indexed datapoint_number + new_annotation = BoundingBoxes( + datapoint_number=new_idx, + boxes_xyxy=original_annotation.boxes_xyxy, + labels=original_annotation.labels, + scores=original_annotation.scores + ) + subset_annotations.add(new_annotation) + return subset_annotations + def _process_predictions( self, predictions: List[Dict[str, Any]], From 799289bee8e2be8cc9563e33abedec59a12115b5 Mon Sep 17 00:00:00 2001 From: iamheinrich <76793837+iamheinrich@users.noreply.github.com> Date: Sat, 24 May 2025 20:11:36 +0200 Subject: [PATCH 6/6] refactor: new folder structure --- doleus/datasets/base.py | 3 +- doleus/datasets/classification.py | 3 +- doleus/datasets/detection.py | 3 +- doleus/storage/__init__.py | 15 +++-- doleus/storage/ground_truth_store/__init__.py | 9 +++ doleus/storage/ground_truth_store/base.py | 56 +++++++++++++++++++ .../classification.py} | 2 +- .../detection.py} | 2 +- doleus/storage/metadata_store/__init__.py | 5 ++ .../store.py} | 2 +- doleus/storage/prediction_store/__init__.py | 9 +++ .../base.py} | 56 +------------------ .../classification.py} | 2 +- .../detection.py} | 2 +- 14 files changed, 100 insertions(+), 69 deletions(-) create mode 100644 doleus/storage/ground_truth_store/__init__.py create mode 100644 doleus/storage/ground_truth_store/base.py rename doleus/storage/{classification_ground_truth_store.py => ground_truth_store/classification.py} (98%) rename doleus/storage/{detection_ground_truth_store.py => ground_truth_store/detection.py} (97%) create mode 100644 doleus/storage/metadata_store/__init__.py rename doleus/storage/{metadata_store.py => metadata_store/store.py} (97%) create mode 100644 doleus/storage/prediction_store/__init__.py rename doleus/storage/{base_store.py => prediction_store/base.py} (71%) rename doleus/storage/{classification_prediction_store.py => prediction_store/classification.py} (99%) rename doleus/storage/{detection_prediction_store.py => prediction_store/detection.py} (98%) diff --git a/doleus/datasets/base.py b/doleus/datasets/base.py index 2e91e91..9cd5a8c 100644 --- a/doleus/datasets/base.py +++ b/doleus/datasets/base.py @@ -9,8 +9,9 @@ from doleus.annotations import BoundingBoxes, Labels from doleus.storage import ( MetadataStore, + BasePredictionStore, + BaseGroundTruthStore, ) -from doleus.storage.base_store import BasePredictionStore, BaseGroundTruthStore from doleus.utils import ( ATTRIBUTE_FUNCTIONS, OPERATOR_DICT, diff --git a/doleus/datasets/classification.py b/doleus/datasets/classification.py index bb663cd..2912a93 100644 --- a/doleus/datasets/classification.py +++ b/doleus/datasets/classification.py @@ -5,8 +5,7 @@ from doleus.datasets.base import Doleus from doleus.utils import TaskType -from doleus.storage.classification_ground_truth_store import ClassificationGroundTruthStore -from doleus.storage.classification_prediction_store import ClassificationPredictionStore +from doleus.storage import ClassificationGroundTruthStore, ClassificationPredictionStore class DoleusClassification(Doleus): diff --git a/doleus/datasets/detection.py b/doleus/datasets/detection.py index 867bc89..2941a85 100644 --- a/doleus/datasets/detection.py +++ b/doleus/datasets/detection.py @@ -4,8 +4,7 @@ from doleus.datasets.base import Doleus from doleus.utils import TaskType -from doleus.storage.detection_ground_truth_store import DetectionGroundTruthStore -from doleus.storage.detection_prediction_store import DetectionPredictionStore +from doleus.storage import DetectionGroundTruthStore, DetectionPredictionStore from doleus.annotations import Annotations diff --git a/doleus/storage/__init__.py b/doleus/storage/__init__.py index b9a56c4..9f6726b 100644 --- a/doleus/storage/__init__.py +++ b/doleus/storage/__init__.py @@ -1,8 +1,13 @@ -from doleus.storage.base_store import BasePredictionStore, BaseGroundTruthStore -from doleus.storage.classification_ground_truth_store import ClassificationGroundTruthStore -from doleus.storage.classification_prediction_store import ClassificationPredictionStore -from doleus.storage.detection_ground_truth_store import DetectionGroundTruthStore -from doleus.storage.detection_prediction_store import DetectionPredictionStore +from doleus.storage.prediction_store import ( + BasePredictionStore, + ClassificationPredictionStore, + DetectionPredictionStore, +) +from doleus.storage.ground_truth_store import ( + BaseGroundTruthStore, + ClassificationGroundTruthStore, + DetectionGroundTruthStore, +) from doleus.storage.metadata_store import MetadataStore __all__ = [ diff --git a/doleus/storage/ground_truth_store/__init__.py b/doleus/storage/ground_truth_store/__init__.py new file mode 100644 index 0000000..e390fbd --- /dev/null +++ b/doleus/storage/ground_truth_store/__init__.py @@ -0,0 +1,9 @@ +from doleus.storage.ground_truth_store.base import BaseGroundTruthStore +from doleus.storage.ground_truth_store.classification import ClassificationGroundTruthStore +from doleus.storage.ground_truth_store.detection import DetectionGroundTruthStore + +__all__ = [ + "BaseGroundTruthStore", + "ClassificationGroundTruthStore", + "DetectionGroundTruthStore", +] \ No newline at end of file diff --git a/doleus/storage/ground_truth_store/base.py b/doleus/storage/ground_truth_store/base.py new file mode 100644 index 0000000..aca2289 --- /dev/null +++ b/doleus/storage/ground_truth_store/base.py @@ -0,0 +1,56 @@ +from abc import ABC, abstractmethod +from typing import Any, Optional + +from doleus.annotations import Annotation, Annotations + + +class BaseGroundTruthStore(ABC): + """Base storage for ground truth data for a specific dataset instance.""" + + def __init__(self, dataset: Any): + """ + Initialize the ground truth store. + + Parameters + ---------- + dataset : Any + The raw PyTorch dataset object. + """ + self.dataset = dataset + self.groundtruths: Optional[Annotations] = None + self.groundtruths = self._process_groundtruths() + + @abstractmethod + def _process_groundtruths(self) -> Annotations: + """ + Process raw ground truth data from the dataset into the standard annotation format. + Actual implementation will depend on the task type (classification, detection). + + Returns + ------- + Annotations + Processed ground truths in standard annotation format. + """ + pass + + def get(self, datapoint_number: int) -> Optional[Annotation]: + """ + Get a single ground truth annotation object by datapoint number. + + Parameters + ---------- + datapoint_number : int + The ID of the sample in the dataset. + + Returns + ------- + Optional[Annotation] + The specific Annotation object (e.g., Labels, BoundingBoxes) for the datapoint, + or None if not found. + """ + if self.groundtruths is None: + return None + try: + return self.groundtruths[datapoint_number] + except KeyError: + return None \ No newline at end of file diff --git a/doleus/storage/classification_ground_truth_store.py b/doleus/storage/ground_truth_store/classification.py similarity index 98% rename from doleus/storage/classification_ground_truth_store.py rename to doleus/storage/ground_truth_store/classification.py index d8e204d..08458b3 100644 --- a/doleus/storage/classification_ground_truth_store.py +++ b/doleus/storage/ground_truth_store/classification.py @@ -1,7 +1,7 @@ import torch from typing import Any -from doleus.storage.base_store import BaseGroundTruthStore +from doleus.storage.ground_truth_store.base import BaseGroundTruthStore from doleus.annotations import Annotations from doleus.annotations.classification import Labels from doleus.utils.data import Task diff --git a/doleus/storage/detection_ground_truth_store.py b/doleus/storage/ground_truth_store/detection.py similarity index 97% rename from doleus/storage/detection_ground_truth_store.py rename to doleus/storage/ground_truth_store/detection.py index 81f0fd9..af5e5d7 100644 --- a/doleus/storage/detection_ground_truth_store.py +++ b/doleus/storage/ground_truth_store/detection.py @@ -1,7 +1,7 @@ import torch from typing import Any -from doleus.storage.base_store import BaseGroundTruthStore +from doleus.storage.ground_truth_store.base import BaseGroundTruthStore from doleus.annotations import Annotations from doleus.annotations.detection import BoundingBoxes diff --git a/doleus/storage/metadata_store/__init__.py b/doleus/storage/metadata_store/__init__.py new file mode 100644 index 0000000..c23a4dc --- /dev/null +++ b/doleus/storage/metadata_store/__init__.py @@ -0,0 +1,5 @@ +from doleus.storage.metadata_store.store import MetadataStore + +__all__ = [ + "MetadataStore", +] \ No newline at end of file diff --git a/doleus/storage/metadata_store.py b/doleus/storage/metadata_store/store.py similarity index 97% rename from doleus/storage/metadata_store.py rename to doleus/storage/metadata_store/store.py index b6a576e..5540458 100644 --- a/doleus/storage/metadata_store.py +++ b/doleus/storage/metadata_store/store.py @@ -74,4 +74,4 @@ def get_subset(self, indices: List[int]) -> List[Dict[str, Any]]: List[Dict[str, Any]] List of metadata dictionaries for the specified indices. """ - return [self.metadata[i] for i in indices] + return [self.metadata[i] for i in indices] \ No newline at end of file diff --git a/doleus/storage/prediction_store/__init__.py b/doleus/storage/prediction_store/__init__.py new file mode 100644 index 0000000..fe69628 --- /dev/null +++ b/doleus/storage/prediction_store/__init__.py @@ -0,0 +1,9 @@ +from doleus.storage.prediction_store.base import BasePredictionStore +from doleus.storage.prediction_store.classification import ClassificationPredictionStore +from doleus.storage.prediction_store.detection import DetectionPredictionStore + +__all__ = [ + "BasePredictionStore", + "ClassificationPredictionStore", + "DetectionPredictionStore", +] \ No newline at end of file diff --git a/doleus/storage/base_store.py b/doleus/storage/prediction_store/base.py similarity index 71% rename from doleus/storage/base_store.py rename to doleus/storage/prediction_store/base.py index 7ffddaf..27eb252 100644 --- a/doleus/storage/base_store.py +++ b/doleus/storage/prediction_store/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Dict, List, Union, Optional +from typing import Any, Dict, List, Union import torch @@ -118,56 +118,4 @@ def get_predictions(self, model_id: str) -> Annotations: """ if model_id not in self.predictions: raise KeyError(f"No predictions found for model: {model_id}") - return self.predictions[model_id] - - -class BaseGroundTruthStore(ABC): - """Base storage for ground truth data for a specific dataset instance.""" - - def __init__(self, dataset: Any): - """ - Initialize the ground truth store. - - Parameters - ---------- - dataset : Any - The raw PyTorch dataset object. - """ - self.dataset = dataset - self.groundtruths: Optional[Annotations] = None - self.groundtruths = self._process_groundtruths() - - @abstractmethod - def _process_groundtruths(self) -> Annotations: - """ - Process raw ground truth data from the dataset into the standard annotation format. - Actual implementation will depend on the task type (classification, detection). - - Returns - ------- - Annotations - Processed ground truths in standard annotation format. - """ - pass - - def get(self, datapoint_number: int) -> Optional[Annotation]: - """ - Get a single ground truth annotation object by datapoint number. - - Parameters - ---------- - datapoint_number : int - The ID of the sample in the dataset. - - Returns - ------- - Optional[Annotation] - The specific Annotation object (e.g., Labels, BoundingBoxes) for the datapoint, - or None if not found. - """ - if self.groundtruths is None: - return None - try: - return self.groundtruths[datapoint_number] - except KeyError: - return None \ No newline at end of file + return self.predictions[model_id] \ No newline at end of file diff --git a/doleus/storage/classification_prediction_store.py b/doleus/storage/prediction_store/classification.py similarity index 99% rename from doleus/storage/classification_prediction_store.py rename to doleus/storage/prediction_store/classification.py index 43eebdb..6dfb815 100644 --- a/doleus/storage/classification_prediction_store.py +++ b/doleus/storage/prediction_store/classification.py @@ -4,7 +4,7 @@ from torch import Tensor from doleus.annotations import Annotations, Labels -from doleus.storage.base_store import BasePredictionStore +from doleus.storage.prediction_store.base import BasePredictionStore from doleus.utils import Task diff --git a/doleus/storage/detection_prediction_store.py b/doleus/storage/prediction_store/detection.py similarity index 98% rename from doleus/storage/detection_prediction_store.py rename to doleus/storage/prediction_store/detection.py index 3ffbb5b..9c6c9dd 100644 --- a/doleus/storage/detection_prediction_store.py +++ b/doleus/storage/prediction_store/detection.py @@ -3,7 +3,7 @@ import torch from doleus.annotations import Annotations, BoundingBoxes -from doleus.storage.base_store import BasePredictionStore +from doleus.storage.prediction_store.base import BasePredictionStore class DetectionPredictionStore(BasePredictionStore):