diff --git a/setup.py b/setup.py index 8d0f83b..0a0d9a8 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def readme(): setup(name='yapic_io', - version='0.1.1', + version='0.1.2', description='io data handling module for various image sources as interface for pixel classification tools', long_description=readme(), author='Manuel Schoelling, Christoph Moehl', diff --git a/yapic_io/ilastik_connector.py b/yapic_io/ilastik_connector.py index 0f4356c..0e20f66 100644 --- a/yapic_io/ilastik_connector.py +++ b/yapic_io/ilastik_connector.py @@ -1,258 +1,285 @@ -from itertools import zip_longest -import os -import logging -from functools import lru_cache -import numpy as np -import pyilastik -from yapic_io.tiff_connector import TiffConnector - -logger = logging.getLogger(os.path.basename(__file__)) - - -class IlastikConnector(TiffConnector): - ''' - Implementation of Connector for tiff images up to 4 dimensions and - corresponding Ilastik_ project file. The Ilastik_ Project file - is supposed to contain manually drawn labels for all tiff files specified - with img_filepath - - .. _Ilastik: http://www.ilastik.org/ - - Parameters - ---------- - img_filepath : str or list of str - Path to source pixel images (use wildcards for filtering) - or a list of filenames. - label_filepath : str - Path to one Ilastik Project File (with extension ilp). Ilastik_ - versions from 1.3 on are supported. - savepath : str, optional - Directory to save pixel classifiaction results as probability - images. - - Notes - ----- - Label images and pixel images have to be equal in zxy dimensions, - but can differ in nr of channels. - - Labels can be read from multichannel images. This is needed for - networks with multiple output layers. Each channel is assigned one - output layer. Different labels from different channels can overlap - (can share identical xyz positions). - - Files from Ilastik v1.2 and v1.3 are supported (storage version 0.1). - - Examples - -------- - >>> from yapic_io.ilastik_connector import IlastikConnector - >>> img_dir = 'yapic_io/test_data/ilastik/pixels_ilastik-multiim-1.2/*.tif' - >>> ilastik_path = 'yapic_io/test_data/ilastik/ilastik-multiim-1.2.ilp' - >>> c = IlastikConnector(img_dir, ilastik_path) - ... # doctest:+ELLIPSIS - ... - >>> print(c) - IlastikConnector object - image filepath: yapic_io/test_data/ilastik/pixels_ilastik-multiim-1.2 - label filepath: yapic_io/test_data/ilastik/ilastik-multiim-1.2.ilp - nr of images: 3 - labelvalue_mapping: [{1: 1, 2: 2}] - - See Also - -------- - yapic_io.tiff_connector.TiffConnector - ''' - def _handle_lbl_filenames(self, label_filepath): - label_path = label_filepath - self.ilp = pyilastik.read_project(label_filepath, skip_image=True) - lbl_filenames = self.ilp.image_path_list() - - return label_path, lbl_filenames - - def __repr__(self): - infostring = \ - 'IlastikConnector object\n' \ - 'image filepath: {}\n' \ - 'label filepath: {}\n'\ - 'nr of images: {}\n'\ - 'labelvalue_mapping: {}'.format(self.img_path, - self.label_path, - self.image_count(), - self.labelvalue_mapping) - return infostring - - def filter_labeled(self): - ''' - Removes images without labels. - - Returns - ------- - IlastikConnector - Connector object containing only images with labels. - ''' - pairs = [self.filenames[i]for i in range( - self.image_count()) if self.label_count_for_image(i)] - - tiff_sel = [self.img_path / pair.img for pair in pairs] - - return IlastikConnector(tiff_sel, self.label_path, - savepath=self.savepath) - - def split(self, fraction, random_seed=42): - ''' - Split the images pseudo-randomly into two Connector subsets. - - The first of size `(1-fraction)*N_images`, the other of size - `fraction*N_images` - - Parameters - ---------- - fraction : float - random_seed : float, optional - - Returns - ------- - connector_1, connector_2 - ''' - - img_fnames1, img_fnames2, mask = self._split_img_fnames( - fraction, random_seed=random_seed) - - conn1 = IlastikConnector(img_fnames1, self.label_path, - savepath=self.savepath) - conn2 = IlastikConnector(img_fnames2, self.label_path, - savepath=self.savepath) - - # ensures that both resulting connectors have the same - # labelvalue mapping (issue #1) - conn1.labelvalue_mapping = self.labelvalue_mapping - conn2.labelvalue_mapping = self.labelvalue_mapping - - return conn1, conn2 - - @lru_cache(maxsize=20) - def label_tile(self, image_nr, pos_zxy, size_zxy, label_value): - ''' - Get 3d zxy boolean matrix where positions of the requested label - are indicated with True. Only mapped labelvalues can be requested. - - dimension order: (z, x, y) - - Parameters - ---------- - image_nr : int - Index of image. - pos_zxy : (zslice, x, y) - Upper left position of subsection. - label_value : int - Id of the label. - - Returns - ------- - numpy.ndarray - 3D subsection of labelmatrix as boolean mask in dimension order - (z, x, y) - ''' - - slices = np.array([[pos_zxy[0], pos_zxy[0] + size_zxy[0]], # z - [pos_zxy[2], pos_zxy[2] + size_zxy[2]], # y - [pos_zxy[1], pos_zxy[1] + size_zxy[1]], # x - [0, 1]]) # c - - if self.ilp.n_dims(image_nr) == 0: # no labels in image - return np.zeros(size_zxy) > 0 - - elif self.ilp.n_dims(image_nr) == 4: # z-stacks - lbl = self.ilp.tile(image_nr, slices) - - elif self.ilp.n_dims(image_nr) == 3: # 2d images - lbl = self.ilp.tile(image_nr, slices[1:, :]) - lbl = np.expand_dims(lbl, axis=0) # add z axis - - # zyxc to czxy - lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) - C, original_label_value = self._mapped_label_value_to_original( - label_value) - lbl = (lbl == original_label_value) - - return lbl[0, :, :, :] - - def check_label_matrix_dimensions(self): - ''' - Notes - ----- - Overloads method from tiff connector. - Method does nothing since it is expected that labelmatrix dimensions - are correct for Ilastik Projects. - ''' - return True - - @lru_cache(maxsize=1) - def original_label_values_for_all_images(self): - ''' - Get all unique label values per image. - - Returns - ------- - list - List of sets. Each set corresponds to 1 label channel. - each set contains the label values of that channel. - E.g. `[{91, 109, 150}, {90, 100}]` for two label channels - ''' - labels_per_channel = [] - - for image_nr in range(self.image_count()): - label_filename = str(self.filenames[image_nr].lbl) - - if label_filename is None: - msg = 'No label matrix file found for image file #{}.' - logger.warning(msg.format(image_nr)) - return None - - _, (img, lbl, _) = self.ilp[label_filename] - lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) - - C = lbl.shape[0] - labels = [np.unique(lbl[c, ...]) for c in range(C)] - labels = [set(labels) - {0} for labels in labels] - - labels_per_channel = [l1.union(l2) - for l1, l2 in zip_longest(labels_per_channel, - labels, - fillvalue=set())] - - return labels_per_channel - - @lru_cache(maxsize=1500) - def label_count_for_image(self, image_nr): - ''' - Get number of labels per labelvalue for an image. - - Parameters - ---------- - image_nr : int - index of image - - Returns - ------- - dict - ''' - label_filename = str(self.filenames[image_nr].lbl) - - if label_filename is None: - msg = 'No label matrix file found for image file #{}.' - logger.warning(msg.format(image_nr)) - return None - - _, (img, lbl, _) = self.ilp[label_filename] - lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) - - C = lbl.shape[0] - labels = [np.unique(lbl[c, ...]) for c in range(C)] - - original_label_count = [{l: np.count_nonzero(lbl[c, ...] == l) - for l in labels[c] if l > 0} - for c in range(C)] - label_count = {self.labelvalue_mapping[c][l]: count - for c, orig in enumerate(original_label_count) - for l, count in orig.items()} - return label_count +from itertools import zip_longest +import os +import logging +from functools import lru_cache +import numpy as np +import pyilastik +from yapic_io.tiff_connector import TiffConnector +from pathlib import Path +import collections + +FilePair = collections.namedtuple('FilePair', ['img', 'lbl']) +logger = logging.getLogger(os.path.basename(__file__)) + + +class IlastikConnector(TiffConnector): + ''' + Implementation of Connector for tiff images up to 4 dimensions and + corresponding Ilastik_ project file. The Ilastik_ Project file + is supposed to contain manually drawn labels for all tiff files specified + with img_filepath + + .. _Ilastik: http://www.ilastik.org/ + + Parameters + ---------- + img_filepath : str or list of str + Path to source pixel images (use wildcards for filtering) + or a list of filenames. + label_filepath : str + Path to one Ilastik Project File (with extension ilp). Ilastik_ + versions from 1.3 on are supported. + savepath : str, optional + Directory to save pixel classifiaction results as probability + images. + + Notes + ----- + Label images and pixel images have to be equal in zxy dimensions, + but can differ in nr of channels. + + Labels can be read from multichannel images. This is needed for + networks with multiple output layers. Each channel is assigned one + output layer. Different labels from different channels can overlap + (can share identical xyz positions). + + Files from Ilastik v1.2 and v1.3 are supported (storage version 0.1). + + Examples + -------- + >>> from yapic_io.ilastik_connector import IlastikConnector + >>> img_dir = 'yapic_io/test_data/ilastik/pixels_ilastik-multiim-1.2/*.tif' + >>> ilastik_path = 'yapic_io/test_data/ilastik/ilastik-multiim-1.2.ilp' + >>> c = IlastikConnector(img_dir, ilastik_path) + ... # doctest:+ELLIPSIS + ... + >>> print(c) + IlastikConnector object + image filepath: yapic_io/test_data/ilastik/pixels_ilastik-multiim-1.2 + label filepath: yapic_io/test_data/ilastik/ilastik-multiim-1.2.ilp + nr of images: 3 + labelvalue_mapping: [{1: 1, 2: 2}] + + See Also + -------- + yapic_io.tiff_connector.TiffConnector + ''' + + def _assemble_filenames(self, pairs): + self.filenames = [FilePair(Path(img), Path(lbl)) + for img, lbl in pairs if lbl] + print('filenames in ilastikconnector') + print(self.filenames) + + def _handle_lbl_filenames(self, label_filepath): + label_path = label_filepath + self.ilp = pyilastik.read_project(label_filepath, skip_image=True) + lbl_filenames = self.ilp.image_path_list() + + return label_path, lbl_filenames + + def __repr__(self): + infostring = \ + 'IlastikConnector object\n' \ + 'image filepath: {}\n' \ + 'label filepath: {}\n'\ + 'nr of images: {}\n'\ + 'labelvalue_mapping: {}'.format(self.img_path, + self.label_path, + self.image_count(), + self.labelvalue_mapping) + return infostring + + def _new_label(self,label_value): + labels_per_channel = [] + + new_list = [] + new_list = [x for x in label_value[1] if x[1] is not None] + + + for x in label_value: + if label_value[1] is not None: + new_list.append(x) + else: + pass + label_value = new_list + return label_value + + + def filter_labeled(self): + ''' + Removes images without labels. + + Returns + ------- + IlastikConnector + Connector object containing only images with labels. + ''' + pairs = [self.filenames[i]for i in range( + self.image_count()) if self.label_count_for_image(i)] + + tiff_sel = [self.img_path / pair.img for pair in pairs] + + return IlastikConnector(tiff_sel, self.label_path, + savepath=self.savepath) + + def split(self, fraction, random_seed=42): + ''' + Split the images pseudo-randomly into two Connector subsets. + + The first of size `(1-fraction)*N_images`, the other of size + `fraction*N_images` + + Parameters + ---------- + fraction : float + random_seed : float, optional + + Returns + ------- + connector_1, connector_2 + ''' + + img_fnames1, img_fnames2, mask = self._split_img_fnames( + fraction, random_seed=random_seed) + + conn1 = IlastikConnector(img_fnames1, self.label_path, + savepath=self.savepath) + conn2 = IlastikConnector(img_fnames2, self.label_path, + savepath=self.savepath) + + # ensures that both resulting connectors have the same + # labelvalue mapping (issue #1) + conn1.labelvalue_mapping = self.labelvalue_mapping + conn2.labelvalue_mapping = self.labelvalue_mapping + + return conn1, conn2 + + @lru_cache(maxsize=20) + def label_tile(self, image_nr, pos_zxy, size_zxy, label_value): + ''' + Get 3d zxy boolean matrix where positions of the requested label + are indicated with True. Only mapped labelvalues can be requested. + + dimension order: (z, x, y) + + Parameters + ---------- + image_nr : int + Index of image. + pos_zxy : (zslice, x, y) + Upper left position of subsection. + label_value : int + Id of the label. + + Returns + ------- + numpy.ndarray + 3D subsection of labelmatrix as boolean mask in dimension order + (z, x, y) + ''' + + slices = np.array([[pos_zxy[0], pos_zxy[0] + size_zxy[0]], # z + [pos_zxy[2], pos_zxy[2] + size_zxy[2]], # y + [pos_zxy[1], pos_zxy[1] + size_zxy[1]], # x + [0, 1]]) # c + + if self.ilp.n_dims(image_nr) == 0: # no labels in image + return np.zeros(size_zxy) > 0 + + elif self.ilp.n_dims(image_nr) == 4: # z-stacks + lbl = self.ilp.tile(image_nr, slices) + + elif self.ilp.n_dims(image_nr) == 3: # 2d images + lbl = self.ilp.tile(image_nr, slices[1:, :]) + lbl = np.expand_dims(lbl, axis=0) # add z axis + + # zyxc to czxy + lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) + C, original_label_value = self._mapped_label_value_to_original( + label_value) + lbl = (lbl == original_label_value) + + return lbl[0, :, :, :] + + def check_label_matrix_dimensions(self): + ''' + Notes + ----- + Overloads method from tiff connector. + Method does nothing since it is expected that labelmatrix dimensions + are correct for Ilastik Projects. + ''' + return True + + @lru_cache(maxsize=1) + def original_label_values_for_all_images(self): + ''' + Get all unique label values per image. + + Returns + ------- + list + List of sets. Each set corresponds to 1 label channel. + each set contains the label values of that channel. + E.g. `[{91, 109, 150}, {90, 100}]` for two label channels + ''' + labels_per_channel = [] + + for image_nr in range(self.image_count()): + label_filename = str(self.filenames[image_nr].lbl) + + if label_filename is None: + msg = 'No label matrix file found for image file #{}.' + logger.warning(msg.format(image_nr)) + return None + print('label filename') + print(label_filename) + _, (img, lbl, _) = self.ilp[label_filename] + lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) + + C = lbl.shape[0] + labels = [np.unique(lbl[c, ...]) for c in range(C)] + labels = [set(labels) - {0} for labels in labels] + + labels_per_channel = [l1.union(l2) + for l1, l2 in zip_longest(labels_per_channel, + labels, + fillvalue=set())] + + return labels_per_channel + + @lru_cache(maxsize=1500) + def label_count_for_image(self, image_nr): + ''' + Get number of labels per labelvalue for an image. + + Parameters + ---------- + image_nr : int + index of image + + Returns + ------- + dict + ''' + label_filename = str(self.filenames[image_nr].lbl) + + if label_filename is None: + msg = 'No label matrix file found for image file #{}.' + logger.warning(msg.format(image_nr)) + return None + + _, (img, lbl, _) = self.ilp[label_filename] + lbl = np.transpose(lbl, (3, 0, 2, 1)).astype(int) + + C = lbl.shape[0] + labels = [np.unique(lbl[c, ...]) for c in range(C)] + + original_label_count = [{l: np.count_nonzero(lbl[c, ...] == l) + for l in labels[c] if l > 0} + for c in range(C)] + label_count = {self.labelvalue_mapping[c][l]: count + for c, orig in enumerate(original_label_count) + for l, count in orig.items()} + return label_count diff --git a/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/20width_23height_3slices_2channels.tif b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/20width_23height_3slices_2channels.tif new file mode 100644 index 0000000..615d908 Binary files /dev/null and b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/20width_23height_3slices_2channels.tif differ diff --git a/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/34width_28height_2slices_2channels.tif b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/34width_28height_2slices_2channels.tif new file mode 100644 index 0000000..de11999 Binary files /dev/null and b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/34width_28height_2slices_2channels.tif differ diff --git a/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/6width_4height_3slices_2channels.tif b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/6width_4height_3slices_2channels.tif new file mode 100644 index 0000000..f6c34ee Binary files /dev/null and b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/6width_4height_3slices_2channels.tif differ diff --git a/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/additional_image_not_present_in_ilastik_project.tif b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/additional_image_not_present_in_ilastik_project.tif new file mode 100644 index 0000000..de11999 Binary files /dev/null and b/yapic_io/test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img/additional_image_not_present_in_ilastik_project.tif differ diff --git a/yapic_io/tests/test_ilastik_connector.py b/yapic_io/tests/test_ilastik_connector.py index 2d45ea5..008c379 100644 --- a/yapic_io/tests/test_ilastik_connector.py +++ b/yapic_io/tests/test_ilastik_connector.py @@ -1,313 +1,323 @@ -import os -import logging -from unittest import TestCase -from yapic_io.ilastik_connector import IlastikConnector -from numpy.testing import assert_array_equal -import numpy as np -from pprint import pprint -from pathlib import Path - -logger = logging.getLogger(os.path.basename(__file__)) - -base_path = os.path.dirname(__file__) - - -class TestIlastikConnector(TestCase): - def setup_storage_version_12(self): - img_path = os.path.join(base_path, '../test_data/ilastik') - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-1.2.ilp') - - return IlastikConnector(img_path, lbl_path) - - def test_tiles(self): - c = self.setup_storage_version_12() - - lbl_value = 2 - pos_czxy = (0, 0, 0, 0) - size_czxy = (1, 1, 1, 1) - - img_tile = c.get_tile(0, pos_czxy, size_czxy) - lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) - - assert_array_equal(img_tile.shape[1:], lbl_tile.shape) - - def test_label_tiles(self): - c = self.setup_storage_version_12() - - lbl_value = 1 - pos_czxy = (0, 4, 0, 0) - size_czxy = (1, 1, 2, 4) - val = np.array([[[False, False, False, False], - [False, False, False, True]]]) - lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) - assert_array_equal(lbl_tile, val) - - lbl_value = 2 - pos_czxy = (0, 1, 0, 0) - size_czxy = (1, 1, 2, 4) - val = np.array([[[False, False, False, False], - [False, True, False, False]]]) - lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) - assert_array_equal(lbl_tile, val) - - lbl_value = 3 - pos_czxy = (0, 7, 0, 0) - size_czxy = (1, 1, 2, 4) - val = np.array([[[False, False, False, False], - [False, True, False, False]]]) - lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) - assert_array_equal(lbl_tile, val) - - def test_label_count(self): - c = self.setup_storage_version_12() - - actual_counts = c.label_count_for_image(0) - expected_counts = {1: 1, 2: 1, 3: 1} - - self.assertEqual(actual_counts, expected_counts) - - def test_constructor(self): - img_path = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - c = IlastikConnector(img_path, lbl_path) - - lbl_identifiers = \ - [Path(('pixels_ilastik-multiim-1.2/' - '20width_23height_3slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '34width_28height_2slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '6width_4height_3slices_2channels.tif'))] - - assert_array_equal(lbl_identifiers, [lbl for im, lbl in c.filenames]) - - def test_constructor_with_subset(self): - - # by passing a list of tiff filenames to IlastikConnector - # (rather than a wildcard) an image subset of the ilastik project - # can be selected - - tiff_dir = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - selected_tiffs = [os.path.join( - tiff_dir, - '20width_23height_3slices_2channels.tif'), - os.path.join( - tiff_dir, - '6width_4height_3slices_2channels.tif')] - - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - c = IlastikConnector(selected_tiffs, lbl_path) - - lbl_identifiers = \ - [Path(('pixels_ilastik-multiim-1.2/' - '20width_23height_3slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '6width_4height_3slices_2channels.tif'))] - - pprint(c.filenames) - assert_array_equal(lbl_identifiers, [lbl for im, lbl in c.filenames]) - - def test_label_tile(self): - - img_path = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - c = IlastikConnector(img_path, lbl_path) - - mat_val = np.array([[0., 0., 0., 0., 0., 0., 0.], - [0., 0., 2., 2., 2., 0., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 2., 0.], - [0., 2., 2., 2., 2., 0., 0.], - [0., 0., 0., 2., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0.]]) - - lbl = c.label_tile(0, (0, 0, 0), (1, 19, 17), 2) - assert_array_equal(lbl[0, 6:18, 9:16], mat_val != 0) - - mat_val = np.array([[0., 0., 0., 1., 0., 0., 0.], - [0., 1., 1., 1., 1., 0., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 1., 0.], - [0., 1., 1., 1., 1., 0., 0.], - [0., 0., 0., 1., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0.]]) - - lbl = c.label_tile(0, (0, 0, 0), (1, 14, 9), 1) - assert_array_equal(lbl[0, :13, 1:8], mat_val != 0) - - def test_labels_for_ilastik_versions_12_133_are_equal(self): - - img_path = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.3.3.ilp') - c13 = IlastikConnector(img_path, lbl_path) - - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - c12 = IlastikConnector(img_path, lbl_path) - - lbl12 = c12.label_tile(0, (0, 0, 0), (1, 19, 17), 2) - lbl13 = c13.label_tile(0, (0, 0, 0), (1, 19, 17), 2) - assert_array_equal(lbl12, lbl13) - - assert c12.label_count_for_image(0) == c13.label_count_for_image(0) - - def test_label_tile_purkinjedata(self): - - p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') - img_path = os.path.join(p, 'images') - lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') - - c = IlastikConnector(img_path, lbl_path) - print(c.filenames) - print(c.image_count) - - image_id = 0 # 769_cerebellum_5M41_subset_1.tif - pos_zxy = (0, 309, 212) - size_zxy = (1, 4, 5) - - val = np.array([[[True, False, False, False, False], - [True, True, False, False, False], - [True, True, True, False, False], - [True, True, True, False, False]]]) - lbl = c.label_tile(image_id, pos_zxy, size_zxy, 2) - assert_array_equal(lbl, val) - - val = np.array([[[False, False, False, False, True], - [False, False, False, False, True], - [False, False, False, True, True], - [False, False, False, True, True]]]) - lbl = c.label_tile(image_id, pos_zxy, size_zxy, 4) - assert_array_equal(lbl, val) - - def test_labeltile_dimensions_purkinjedata(self): - - p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') - img_path = os.path.join(p, 'images') - lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') - - c = IlastikConnector(img_path, lbl_path) - - image_id = 3 # 769_cerebellum_5M41_subset_1.tif - pos_zxy = (0, 0, 0) - size_zxy = (1, 1047, 684) # whole image - - lbl = c.label_tile(image_id, pos_zxy, size_zxy, 4) - self.assertEqual(lbl.shape, size_zxy) - - def test_labeltile_for_image_without_labels(self): - p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') - img_path = os.path.join(p, 'images') - lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') - - c = IlastikConnector(img_path, lbl_path) - print(c.filenames) - print(c.image_count) - - image_id = 2 # 769_cerebellum_5M41_subset_1.tif - pos_zxy = (0, 309, 212) - size_zxy = (1, 4, 5) - - val = np.array([[[False, False, False, False, False], - [False, False, False, False, False], - [False, False, False, False, False], - [False, False, False, False, False]]]) - - lbl = c.label_tile(image_id, pos_zxy, size_zxy, 3) - assert_array_equal(lbl, val) - - def test_multi_channel_multi_z(self): - - p = os.path.join(base_path, '../test_data/ilastik/dimensionstest') - img_path = os.path.join(p, 'images') - lbl_path = os.path.join(p, 'x15_y10_z2_c4_classes2.ilp') - c = IlastikConnector(img_path, lbl_path) - pos_zxy = (0, 0, 0) - size_zxy = (2, 15, 10) - - lbl = c.label_tile(0, pos_zxy, size_zxy, 1) - lbl_pos = [[0, 2, 1], [0, 2, 1], [0, 8, 6], [0, 9, 6], [1, 4, 3]] - [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] - - lbl = c.label_tile(0, pos_zxy, size_zxy, 2) - lbl_pos = [[0, 2, 2], [0, 3, 2], [0, 8, 7], [0, 9, 7]] - [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] - - self.assertFalse(lbl[0, 0, 0]) - - - p = os.path.join(base_path, '../test_data/ilastik/dimensionstest') - img_path = os.path.join(p, 'images') - lbl_path = os.path.join(p, 'x15_y10_z2_c4_classes2_ilastik1.3.3.ilp') - c = IlastikConnector(img_path, lbl_path) - pos_zxy = (0, 0, 0) - size_zxy = (2, 15, 10) - - lbl = c.label_tile(0, pos_zxy, size_zxy, 1) - lbl_pos = [[0, 2, 1], [0, 2, 1], [0, 8, 6], [0, 9, 6], [1, 4, 3]] - [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] - - lbl = c.label_tile(0, pos_zxy, size_zxy, 2) - lbl_pos = [[0, 2, 2], [0, 3, 2], [0, 8, 7], [0, 9, 7]] - [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] - - self.assertFalse(lbl[0, 0, 0]) - - def test_filter_labeled(self): - - img_path = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - - c = IlastikConnector(img_path, lbl_path) - c_filtered = c.filter_labeled() - - labelnames = [Path(('pixels_ilastik-multiim-1.2/' - '20width_23height_3slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '34width_28height_2slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '6width_4height_3slices_2channels.tif'))] - - labelnames_flt = [Path(('pixels_ilastik-multiim-1.2/' - '20width_23height_3slices_2channels.tif')), - Path(('pixels_ilastik-multiim-1.2/' - '34width_28height_2slices_2channels.tif'))] - - assert_array_equal(labelnames, [lbl for im, lbl in c.filenames]) - assert_array_equal(labelnames_flt, [ - lbl for im, lbl in c_filtered.filenames]) - - def test_split(self): - img_path = os.path.join( - base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') - lbl_path = os.path.join( - base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') - - c = IlastikConnector(img_path, lbl_path) - - c1, c2 = c.split(0.3) - - assert_array_equal(c1.image_count() + c2.image_count(), - c.image_count()) +import os +import logging +from unittest import TestCase +from yapic_io.ilastik_connector import IlastikConnector +from numpy.testing import assert_array_equal +import numpy as np +from pprint import pprint +from pathlib import Path + +logger = logging.getLogger(os.path.basename(__file__)) + +base_path = os.path.dirname(__file__) + + +class TestIlastikConnector(TestCase): + def setup_storage_version_12(self): + img_path = os.path.join(base_path, '../test_data/ilastik') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-1.2.ilp') + + return IlastikConnector(img_path, lbl_path) + + def test_tiles(self): + c = self.setup_storage_version_12() + + lbl_value = 2 + pos_czxy = (0, 0, 0, 0) + size_czxy = (1, 1, 1, 1) + + img_tile = c.get_tile(0, pos_czxy, size_czxy) + lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) + + assert_array_equal(img_tile.shape[1:], lbl_tile.shape) + + def test_label_tiles(self): + c = self.setup_storage_version_12() + + lbl_value = 1 + pos_czxy = (0, 4, 0, 0) + size_czxy = (1, 1, 2, 4) + val = np.array([[[False, False, False, False], + [False, False, False, True]]]) + lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) + assert_array_equal(lbl_tile, val) + + lbl_value = 2 + pos_czxy = (0, 1, 0, 0) + size_czxy = (1, 1, 2, 4) + val = np.array([[[False, False, False, False], + [False, True, False, False]]]) + lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) + assert_array_equal(lbl_tile, val) + + lbl_value = 3 + pos_czxy = (0, 7, 0, 0) + size_czxy = (1, 1, 2, 4) + val = np.array([[[False, False, False, False], + [False, True, False, False]]]) + lbl_tile = c.label_tile(0, pos_czxy[1:], size_czxy[1:], lbl_value) + assert_array_equal(lbl_tile, val) + + def test_label_count(self): + c = self.setup_storage_version_12() + + actual_counts = c.label_count_for_image(0) + expected_counts = {1: 1, 2: 1, 3: 1} + + self.assertEqual(actual_counts, expected_counts) + + def test_constructor(self): + img_path = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + c = IlastikConnector(img_path, lbl_path) + + lbl_identifiers = \ + [Path(('pixels_ilastik-multiim-1.2/' + '20width_23height_3slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '34width_28height_2slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '6width_4height_3slices_2channels.tif'))] + + assert_array_equal(lbl_identifiers, [lbl for im, lbl in c.filenames]) + + def test_incomplete_label_data(self): + img_path = os.path.join( + base_path, + '../test_data/ilastik/pixels_ilastik_mutliim-1.2_additional_img') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + c = IlastikConnector(img_path, lbl_path) + + + + def test_constructor_with_subset(self): + + # by passing a list of tiff filenames to IlastikConnector + # (rather than a wildcard) an image subset of the ilastik project + # can be selected + + tiff_dir = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + selected_tiffs = [os.path.join( + tiff_dir, + '20width_23height_3slices_2channels.tif'), + os.path.join( + tiff_dir, + '6width_4height_3slices_2channels.tif')] + + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + c = IlastikConnector(selected_tiffs, lbl_path) + + lbl_identifiers = \ + [Path(('pixels_ilastik-multiim-1.2/' + '20width_23height_3slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '6width_4height_3slices_2channels.tif'))] + + pprint(c.filenames) + assert_array_equal(lbl_identifiers, [lbl for im, lbl in c.filenames]) + + def test_label_tile(self): + + img_path = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + c = IlastikConnector(img_path, lbl_path) + + mat_val = np.array([[0., 0., 0., 0., 0., 0., 0.], + [0., 0., 2., 2., 2., 0., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 2., 0.], + [0., 2., 2., 2., 2., 0., 0.], + [0., 0., 0., 2., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0.]]) + + lbl = c.label_tile(0, (0, 0, 0), (1, 19, 17), 2) + assert_array_equal(lbl[0, 6:18, 9:16], mat_val != 0) + + mat_val = np.array([[0., 0., 0., 1., 0., 0., 0.], + [0., 1., 1., 1., 1., 0., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 1., 0.], + [0., 1., 1., 1., 1., 0., 0.], + [0., 0., 0., 1., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0.]]) + + lbl = c.label_tile(0, (0, 0, 0), (1, 14, 9), 1) + assert_array_equal(lbl[0, :13, 1:8], mat_val != 0) + + def test_labels_for_ilastik_versions_12_133_are_equal(self): + + img_path = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.3.3.ilp') + c13 = IlastikConnector(img_path, lbl_path) + + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + c12 = IlastikConnector(img_path, lbl_path) + + lbl12 = c12.label_tile(0, (0, 0, 0), (1, 19, 17), 2) + lbl13 = c13.label_tile(0, (0, 0, 0), (1, 19, 17), 2) + assert_array_equal(lbl12, lbl13) + + assert c12.label_count_for_image(0) == c13.label_count_for_image(0) + + def test_label_tile_purkinjedata(self): + + p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') + img_path = os.path.join(p, 'images') + lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') + + c = IlastikConnector(img_path, lbl_path) + print(c.filenames) + print(c.image_count) + + image_id = 0 # 769_cerebellum_5M41_subset_1.tif + pos_zxy = (0, 309, 212) + size_zxy = (1, 4, 5) + + val = np.array([[[True, False, False, False, False], + [True, True, False, False, False], + [True, True, True, False, False], + [True, True, True, False, False]]]) + lbl = c.label_tile(image_id, pos_zxy, size_zxy, 2) + assert_array_equal(lbl, val) + + val = np.array([[[False, False, False, False, True], + [False, False, False, False, True], + [False, False, False, True, True], + [False, False, False, True, True]]]) + lbl = c.label_tile(image_id, pos_zxy, size_zxy, 4) + assert_array_equal(lbl, val) + + def test_labeltile_dimensions_purkinjedata(self): + + p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') + img_path = os.path.join(p, 'images') + lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') + + c = IlastikConnector(img_path, lbl_path) + + image_id = 3 # 769_cerebellum_5M41_subset_1.tif + pos_zxy = (0, 0, 0) + size_zxy = (1, 1047, 684) # whole image + + lbl = c.label_tile(image_id, pos_zxy, size_zxy, 4) + self.assertEqual(lbl.shape, size_zxy) + + def test_labeltile_for_image_without_labels(self): + p = os.path.join(base_path, '../test_data/ilastik/purkinjetest') + img_path = os.path.join(p, 'images') + lbl_path = os.path.join(p, 'ilastik-1.2.2post1mac.ilp') + + c = IlastikConnector(img_path, lbl_path) + print(c.filenames) + print(c.image_count) + + image_id = 2 # 769_cerebellum_5M41_subset_1.tif + pos_zxy = (0, 309, 212) + size_zxy = (1, 4, 5) + + val = np.array([[[False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False]]]) + + lbl = c.label_tile(image_id, pos_zxy, size_zxy, 3) + assert_array_equal(lbl, val) + + def test_multi_channel_multi_z(self): + + p = os.path.join(base_path, '../test_data/ilastik/dimensionstest') + img_path = os.path.join(p, 'images') + lbl_path = os.path.join(p, 'x15_y10_z2_c4_classes2.ilp') + c = IlastikConnector(img_path, lbl_path) + pos_zxy = (0, 0, 0) + size_zxy = (2, 15, 10) + + lbl = c.label_tile(0, pos_zxy, size_zxy, 1) + lbl_pos = [[0, 2, 1], [0, 2, 1], [0, 8, 6], [0, 9, 6], [1, 4, 3]] + [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] + + lbl = c.label_tile(0, pos_zxy, size_zxy, 2) + lbl_pos = [[0, 2, 2], [0, 3, 2], [0, 8, 7], [0, 9, 7]] + [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] + + self.assertFalse(lbl[0, 0, 0]) + + + p = os.path.join(base_path, '../test_data/ilastik/dimensionstest') + img_path = os.path.join(p, 'images') + lbl_path = os.path.join(p, 'x15_y10_z2_c4_classes2_ilastik1.3.3.ilp') + c = IlastikConnector(img_path, lbl_path) + pos_zxy = (0, 0, 0) + size_zxy = (2, 15, 10) + + lbl = c.label_tile(0, pos_zxy, size_zxy, 1) + lbl_pos = [[0, 2, 1], [0, 2, 1], [0, 8, 6], [0, 9, 6], [1, 4, 3]] + [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] + + lbl = c.label_tile(0, pos_zxy, size_zxy, 2) + lbl_pos = [[0, 2, 2], [0, 3, 2], [0, 8, 7], [0, 9, 7]] + [self.assertTrue(lbl[pos[0], pos[1], pos[2]]) for pos in lbl_pos] + + self.assertFalse(lbl[0, 0, 0]) + + def test_filter_labeled(self): + + img_path = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + + c = IlastikConnector(img_path, lbl_path) + c_filtered = c.filter_labeled() + + labelnames = [Path(('pixels_ilastik-multiim-1.2/' + '20width_23height_3slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '34width_28height_2slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '6width_4height_3slices_2channels.tif'))] + + labelnames_flt = [Path(('pixels_ilastik-multiim-1.2/' + '20width_23height_3slices_2channels.tif')), + Path(('pixels_ilastik-multiim-1.2/' + '34width_28height_2slices_2channels.tif'))] + + assert_array_equal(labelnames, [lbl for im, lbl in c.filenames]) + assert_array_equal(labelnames_flt, [ + lbl for im, lbl in c_filtered.filenames]) + + def test_split(self): + img_path = os.path.join( + base_path, '../test_data/ilastik/pixels_ilastik-multiim-1.2') + lbl_path = os.path.join( + base_path, '../test_data/ilastik/ilastik-multiim-1.2.ilp') + + c = IlastikConnector(img_path, lbl_path) + + c1, c2 = c.split(0.3) + + assert_array_equal(c1.image_count() + c2.image_count(), + c.image_count()) diff --git a/yapic_io/tiff_connector.py b/yapic_io/tiff_connector.py index 97d8976..5833c2a 100644 --- a/yapic_io/tiff_connector.py +++ b/yapic_io/tiff_connector.py @@ -1,530 +1,531 @@ -import logging -import os -import collections -from functools import lru_cache -import yapic_io.utils as ut -import numpy as np -import itertools -import warnings -from itertools import zip_longest -from pathlib import Path -from bigtiff import Tiff, PlaceHolder -from yapic_io.connector import Connector - -logger = logging.getLogger(os.path.basename(__file__)) - -FilePair = collections.namedtuple('FilePair', ['img', 'lbl']) - - -def _handle_img_filenames(img_filepath): - ''' - - checks if list of image filepaths, a single wildcard filepath - or a single filepath without a wildcard is given. - - checks if given filenames exit - - splits into folder and list of filenames - ''' - - if type(img_filepath) in (str, Path): - img_filepath = Path(img_filepath).expanduser() - img_filemask = '*.tif' if img_filepath.is_dir() else img_filepath.name - - folder = img_filepath if img_filepath.is_dir() else img_filepath.parent - filenames = [fname.name for fname in sorted(folder.glob(img_filemask))] - - elif type(img_filepath) in (list, tuple): - - img_filenames = img_filepath - img_filenames = [Path(p).expanduser().resolve() - if p is not None else None - for p in img_filepath] - - assert len(img_filenames) > 0, 'list of image filenames is empty' - - for e in img_filenames: - if e is not None: - assert e.exists(), 'file {} not found'.format(e) - - folders = {fname.parent - for fname in img_filenames if fname is not None} - assert len(folders) == 1, 'image filenames are not in the same folder' - folder = next(iter(folders)) - folder = folder.expanduser().resolve() - filenames = [fname.name - if fname is not None else None - for fname in img_filenames] - - else: - raise NotImplementedError( - 'could not import images from {}'.format(img_filepath)) - - logger.info('{} image files detected.'.format(len(filenames))) - return folder, filenames - - -class TiffConnector(Connector): - ''' - Implementation of Connector for tiff images up to 4 dimensions and - corresponding label masks up to 4 dimensions in tiff format. - - Parameters - ---------- - img_filepath : str or list of str - Path to source pixel images (use wildcards for filtering) - or a list of filenames. - label_filepath : str or list of str - Path to label images (use wildcards for filtering) - or a list of filenames. - savepath : str, optional - Directory to save pixel classifiaction results as probability - images. - - Notes - ----- - Label images and pixel images have to be equal in zxy dimensions, - but can differ in nr of channels. - - Labels can be read from multichannel images. This is needed for - networks with multiple output layers. Each channel is assigned one - output layer. Different labels from different channels can overlap - (can share identical xyz positions). - - Examples - -------- - Create a TiffConnector object with pixel and label data. - - >>> from yapic_io.tiff_connector import TiffConnector - >>> pixel_image_dir = 'yapic_io/test_data/tiffconnector_1/im/*.tif' - >>> label_image_dir = 'yapic_io/test_data/tiffconnector_1/labels/*.tif' - >>> t = TiffConnector(pixel_image_dir, label_image_dir) - >>> print(t) - TiffConnector object - image filepath: yapic_io/test_data/tiffconnector_1/im - label filepath: yapic_io/test_data/tiffconnector_1/labels - nr of images: 3 - labelvalue_mapping: [{91: 1, 109: 2, 150: 3}] - - See Also - -------- - yapic_io.ilastik_connector.IlastikConnector - ''' - - def __init__(self, img_filepath, label_filepath, savepath=None): - - self.img_path, img_filenames = _handle_img_filenames(img_filepath) - self.label_path, lbl_filenames = self._handle_lbl_filenames( - label_filepath) - - assert img_filenames is not None, 'no filenames for pixel images found' - assert len(img_filenames) != 0, 'no filenames for pixel images found' - - if lbl_filenames is None or len(lbl_filenames) == 0: - pairs = [(img, None) for img in img_filenames] - else: - pairs = ut.find_best_matching_pairs(img_filenames, lbl_filenames) - - self.filenames = [FilePair(Path(img), Path(lbl) if lbl else None) - for img, lbl in pairs] - - logger.info('Pixel and label files are assigned as follows:') - logger.info('\n'.join('{p.img} <-> {p.lbl}'.format(p=pair) - for pair in self.filenames)) - - self.savepath = Path(savepath) if savepath is not None else None - - original_labels = self.original_label_values_for_all_images() - self.labelvalue_mapping = self.calc_label_values_mapping( - original_labels) - - self.check_label_matrix_dimensions() - - def _handle_lbl_filenames(self, label_filepath): - return _handle_img_filenames(label_filepath) - - def __repr__(self): - - infostring = \ - 'TiffConnector object\n' \ - 'image filepath: {}\n' \ - 'label filepath: {}\n'\ - 'nr of images: {}\n'\ - 'labelvalue_mapping: {}'.format(self.img_path, - self.label_path, - self.image_count(), - self.labelvalue_mapping) - return infostring - - def filter_labeled(self): - ''' - Removes images without labels. - - Returns - ------- - TiffConnector - Connector object containing only images with labels. - ''' - img_fnames = [self.img_path / img for img, lbl in self.filenames - if lbl is not None] - - lbl_fnames = [self.label_path / lbl - for img, lbl in self.filenames - if lbl is not None] - - return TiffConnector(img_fnames, lbl_fnames, savepath=self.savepath) - - def _split_img_fnames(self, fraction, random_seed=42): - # i took this out from the split method to be used in split method - # of child methods (e.g. IlasikConnector) - N = len(self.filenames) - - state = np.random.get_state() - np.random.seed(random_seed) - mask = np.random.choice([True, False], size=N, p=[ - 1 - fraction, fraction]) - np.random.set_state(state) - - img_fnames1 = [self.img_path / img - for img, lbl in itertools.compress(self.filenames, - mask)] - - img_fnames2 = [self.img_path / img - for img, lbl in itertools.compress(self.filenames, - ~mask)] - - if len(img_fnames1) == 0: - msg = ('TiffConnector.split({}): ' + - 'First connector is empty!').format(fraction) - warnings.warn(msg) - - if len(img_fnames2) == 0: - msg = ('TiffConnector.split({}): ' + - 'Second connector is empty!').format(fraction) - warnings.warn(msg) - - return img_fnames1, img_fnames2, mask - - def split(self, fraction, random_seed=42): - ''' - Split the images pseudo-randomly into two Connector subsets. - - The first of size `(1-fraction)*N_images`, the other of size - `fraction*N_images` - - Parameters - ---------- - fraction : float - random_seed : float, optional - - Returns - ------- - connector_1, connector_2 - ''' - - img_fnames1, img_fnames2, mask = self._split_img_fnames( - fraction, - random_seed=random_seed) - - lbl_fnames1 = [self.label_path / lbl if lbl is not None else None - for img, lbl in itertools.compress(self.filenames, - mask)] - lbl_fnames2 = [self.label_path / lbl if lbl is not None else None - for img, lbl in itertools.compress(self.filenames, - ~mask)] - - conn1 = TiffConnector(img_fnames1, lbl_fnames1, savepath=self.savepath) - conn2 = TiffConnector(img_fnames2, lbl_fnames2, savepath=self.savepath) - - # ensures that both resulting tiff_connectors have the same - # labelvalue mapping (issue #1) - conn1.labelvalue_mapping = self.labelvalue_mapping - conn2.labelvalue_mapping = self.labelvalue_mapping - - # np.random.seed(None) - return conn1, conn2 - - def image_count(self): - return len(self.filenames) - - @lru_cache(maxsize=10) - def _open_probability_map_file(self, - image_nr, - label_value, - multichannel=False): - # memmap is slow, so we must cache it to be fast! - fname = self.filenames[image_nr].img - T = 1 # time frame in output probmap - if multichannel: - fname = Path('{}.tif'.format(fname.stem)) - n_classes = multichannel - C = n_classes - else: - fname = Path('{}_class_{}.tif'.format(fname.stem, label_value)) - C = 1 # channel in output probmap - - path = self.savepath / fname - - if not path.exists(): - _, Z, X, Y = self.image_dimensions(image_nr) - images = [PlaceHolder((Y, X, C), 'float32')] * Z - Tiff.write(images, io=str(path), imagej_shape=(T, C, Z)) - - return Tiff.memmap_tcz(path) - - def put_tile(self, - pixels, - pos_zxy, - image_nr, - label_value, - multichannel=False): - - assert self.savepath is not None - np.testing.assert_equal(len(pos_zxy), 3) - np.testing.assert_equal(len(pixels.shape), 3) - pixels = np.array(pixels, dtype=np.float32) - - slices = self._open_probability_map_file(image_nr, - label_value, - multichannel=multichannel) - - T = C = 0 - if multichannel: - C = label_value - 1 - Z, X, Y = pos_zxy - ZZ, XX, YY = np.array(pos_zxy) + pixels.shape - for z in range(Z, ZZ): - slices[T, C, z][Y:YY, X:XX] = pixels[z - Z, ...].T - - @lru_cache(maxsize=10) - def _open_image_file(self, image_nr): - # memmap is slow, so we must cache it to be fast! - path = self.img_path / self.filenames[image_nr].img - return Tiff.memmap_tcz(path) - - def image_dimensions(self, image_nr): - - img = self._open_image_file(image_nr) - Y, X = img[0, 0, 0].shape - return np.hstack([img.shape[1:], (X, Y)]) - - def label_matrix_dimensions(self, image_nr): - ''' - Get dimensions of the label image. - - - Parameters - ---------- - image_nr : int - index of image - - Returns - ------- - (nr_channels, nr_zslices, nr_x, nr_y) - Labelmatrix shape. - ''' - lbl = self._open_label_file(image_nr) - if lbl is None: - return - - Y, X = lbl[0, 0, 0].shape - return np.hstack([lbl.shape[1:], (X, Y)]) - - def check_label_matrix_dimensions(self): - ''' - Check if label matrix dimensions fit to image dimensions, i.e. - everything identical except nr of channels (label mat always 1). - - Raises - ------ - AssertionError - If label matrix dimensions don't fit to image dimensions. - ''' - N_channels = None - - for i, (img_fname, lbl_fname) in enumerate(self.filenames): - img_dim = self.image_dimensions(i) - lbl_dim = self.label_matrix_dimensions(i) - - msg = 'Dimensions for image #{}: img.shape={}, lbl.shape={}' - logger.debug(msg.format(i, img_dim, lbl_dim)) - - if lbl_dim is None: - continue - - _, *img_dim = img_dim - ch, *lbl_dim = lbl_dim - - if N_channels is None: - N_channels = ch - - msg = 'Label channels inconsistent for {}'.format(lbl_fname) - np.testing.assert_equal(N_channels, ch, msg) - msg = 'Invalid image dims for {} and {}'.format(img_fname, - lbl_fname) - np.testing.assert_array_equal(lbl_dim, img_dim, msg) - - def _mapped_label_value_to_original(self, label_value): - ''' - self.labelvalue_mapping in reverse - ''' - for c, mapping in enumerate(self.labelvalue_mapping): - reverse_mapping = {v: k for k, v in mapping.items()} - original = reverse_mapping.get(label_value) - if original is not None: - return c, original - - msg = 'Should not be reached! (mapped_label_value={}, mapping={})' - raise Exception(msg.format(label_value, self.labelvalue_mapping)) - - def get_tile(self, image_nr, pos, size): - ut.assert_valid_image_subset(self.image_dimensions(image_nr), - pos, - size) - T = 0 - C, Z, X, Y = pos - CC, ZZ, XX, YY = np.array(pos) + size - - slices = self._open_image_file(image_nr) - tile = [[s[Y:YY, X:XX] for s in c[Z:ZZ]] for c in slices[T, C:CC, :]] - tile = np.stack(tile) - tile = np.moveaxis(tile, (0, 1, 2, 3), (0, 1, 3, 2)) - - return tile.astype('float') - - def label_tile(self, image_nr, pos_zxy, size_zxy, label_value): - - T = 0 - Z, X, Y = pos_zxy - ZZ, XX, YY = np.array(pos_zxy) + size_zxy - C, original_label_value = self._mapped_label_value_to_original( - label_value) - - slices = self._open_label_file(image_nr) - if slices is None: - # return tile with False values - return np.zeros(size_zxy) != 0 - tile = [s[Y:YY, X:XX] for s in slices[T, C, Z:ZZ]] - tile = np.stack(tile) - tile = np.moveaxis(tile, (0, 1, 2), (0, 2, 1)) - - tile = (tile == original_label_value) - return tile - - @lru_cache(maxsize=10) - def _open_label_file(self, image_nr): - # memmap is slow, so we must cache it to be fast! - path = self.img_path / self.filenames[image_nr].img - label_filename = self.filenames[image_nr].lbl - - if label_filename is None: - logger.warning( - 'no label matrix file found for image file %s', str(image_nr)) - return None - - path = self.label_path / label_filename - logger.debug('Trying to load labelmat %s', path) - - return Tiff.memmap_tcz(path) - - @staticmethod - def calc_label_values_mapping(original_labels): - ''' - Assign unique labelvalues to original labelvalues. - - For multichannel label images it might happen, that identical - labels occur in different channels. - to avoid conflicts, original labelvalues are mapped to unique values - in ascending order 1, 2, 3, 4... - This is defined in self.labelvalue_mapping: - - [{orig_label1: 1, orig_label2: 2}, {orig_label1: 3, orig_label2: 4},..] - - Each element of the list correponds to one label channel. - Keys are the original labels, values are the assigned labels that - will be seen by the Dataset object. - - Parameters - ---------- - original_labels : array_like - List of original label values. - - Returns - ------- - dict - Labelvalue mapping with original labels as key and new label as - value. - ''' - new_labels = itertools.count(1) - - label_mappings = [ - {l: next(new_labels) for l in sorted(labels_per_channel)} - for labels_per_channel in original_labels - ] - - logger.debug('Label values are mapped to ascending values:') - logger.debug(label_mappings) - return label_mappings - - @lru_cache(maxsize=1) - def original_label_values_for_all_images(self): - ''' - Get all unique label values per image. - - Returns - ------- - list - List of sets. Each set corresponds to 1 label channel. - each set contains the label values of that channel. - E.g. `[{91, 109, 150}, {90, 100}]` for two label channels - ''' - labels_per_channel = [] - - for image_nr in range(self.image_count()): - slices = self._open_label_file(image_nr) - if slices is None: - continue - - T = 0 - C = slices.shape[1] - labels = [np.unique(np.concatenate([np.unique(s) - for s in slices[T, c, :]])) - for c in range(C)] - labels = [set(labels) - {0} for labels in labels] - - labels_per_channel = [l1.union(l2) - for l1, l2 in zip_longest(labels_per_channel, - labels, - fillvalue=set())] - - return labels_per_channel - - @lru_cache(maxsize=1500) - def label_count_for_image(self, image_nr): - ''' - Get number of labels per labelvalue for an image. - - Parameters - ---------- - image_nr : int - index of image - - Returns - ------- - dict - ''' - slices = self._open_label_file(image_nr) - if slices is None: - return None - - T = 0 - C = slices.shape[1] - labels = [np.unique(np.concatenate([np.unique(s) - for s in slices[T, c, :]])) - for c in range(C)] - - original_label_count = [{l: sum(np.count_nonzero(s == l) - for s in slices[T, c, :]) - for l in labels[c] if l > 0} - for c in range(C)] - label_count = {self.labelvalue_mapping[c][l]: count - for c, orig in enumerate(original_label_count) - for l, count in orig.items()} - return label_count +import logging +import os +import collections +from functools import lru_cache +import yapic_io.utils as ut +import numpy as np +import itertools +import warnings +from itertools import zip_longest +from pathlib import Path +from bigtiff import Tiff, PlaceHolder +from yapic_io.connector import Connector + +logger = logging.getLogger(os.path.basename(__file__)) + +FilePair = collections.namedtuple('FilePair', ['img', 'lbl']) + + +def _handle_img_filenames(img_filepath): + ''' + - checks if list of image filepaths, a single wildcard filepath + or a single filepath without a wildcard is given. + - checks if given filenames exit + - splits into folder and list of filenames + ''' + + if type(img_filepath) in (str, Path): + img_filepath = Path(img_filepath).expanduser() + img_filemask = '*.tif' if img_filepath.is_dir() else img_filepath.name + + folder = img_filepath if img_filepath.is_dir() else img_filepath.parent + filenames = [fname.name for fname in sorted(folder.glob(img_filemask))] + + elif type(img_filepath) in (list, tuple): + + img_filenames = img_filepath + img_filenames = [Path(p).expanduser().resolve() + if p is not None else None + for p in img_filepath] + + assert len(img_filenames) > 0, 'list of image filenames is empty' + + for e in img_filenames: + if e is not None: + assert e.exists(), 'file {} not found'.format(e) + + folders = {fname.parent + for fname in img_filenames if fname is not None} + assert len(folders) == 1, 'image filenames are not in the same folder' + folder = next(iter(folders)) + folder = folder.expanduser().resolve() + filenames = [fname.name + if fname is not None else None + for fname in img_filenames] + + else: + raise NotImplementedError( + 'could not import images from {}'.format(img_filepath)) + + logger.info('{} image files detected.'.format(len(filenames))) + return folder, filenames + + +class TiffConnector(Connector): + ''' + Implementation of Connector for tiff images up to 4 dimensions and + corresponding label masks up to 4 dimensions in tiff format. + + Parameters + ---------- + img_filepath : str or list of str + Path to source pixel images (use wildcards for filtering) + or a list of filenames. + label_filepath : str or list of str + Path to label images (use wildcards for filtering) + or a list of filenames. + savepath : str, optional + Directory to save pixel classifiaction results as probability + images. + + Notes + ----- + Label images and pixel images have to be equal in zxy dimensions, + but can differ in nr of channels. + + Labels can be read from multichannel images. This is needed for + networks with multiple output layers. Each channel is assigned one + output layer. Different labels from different channels can overlap + (can share identical xyz positions). + + Examples + -------- + Create a TiffConnector object with pixel and label data. + + >>> from yapic_io.tiff_connector import TiffConnector + >>> pixel_image_dir = 'yapic_io/test_data/tiffconnector_1/im/*.tif' + >>> label_image_dir = 'yapic_io/test_data/tiffconnector_1/labels/*.tif' + >>> t = TiffConnector(pixel_image_dir, label_image_dir) + >>> print(t) + TiffConnector object + image filepath: yapic_io/test_data/tiffconnector_1/im + label filepath: yapic_io/test_data/tiffconnector_1/labels + nr of images: 3 + labelvalue_mapping: [{91: 1, 109: 2, 150: 3}] + + See Also + -------- + yapic_io.ilastik_connector.IlastikConnector + ''' + + def __init__(self, img_filepath, label_filepath, savepath=None): + + self.img_path, img_filenames = _handle_img_filenames(img_filepath) + self.label_path, lbl_filenames = self._handle_lbl_filenames( + label_filepath) + + assert img_filenames is not None, 'no filenames for pixel images found' + assert len(img_filenames) != 0, 'no filenames for pixel images found' + + if lbl_filenames is None or len(lbl_filenames) == 0: + pairs = [(img, None) for img in img_filenames] + else: + pairs = ut.find_best_matching_pairs(img_filenames, lbl_filenames) + + self._assemble_filenames(pairs) + + logger.info('Pixel and label files are assigned as follows:') + logger.info('\n'.join('{p.img} <-> {p.lbl}'.format(p=pair) + for pair in self.filenames)) + + self.savepath = Path(savepath) if savepath is not None else None + + original_labels = self.original_label_values_for_all_images() + self.labelvalue_mapping = self.calc_label_values_mapping( + original_labels) + + self.check_label_matrix_dimensions() + + def _assemble_filenames(self, pairs): + self.filenames = [FilePair(Path(img), Path(lbl) if lbl else None) + for img, lbl in pairs] + + + def _handle_lbl_filenames(self, label_filepath): + return _handle_img_filenames(label_filepath) + + def __repr__(self): + + infostring = \ + 'TiffConnector object\n' \ + 'image filepath: {}\n' \ + 'label filepath: {}\n'\ + 'nr of images: {}\n'\ + 'labelvalue_mapping: {}'.format(self.img_path, + self.label_path, + self.image_count(), + self.labelvalue_mapping) + return infostring + + def filter_labeled(self): + ''' + Removes images without labels. + + Returns + ------- + TiffConnector + Connector object containing only images with labels. + ''' + img_fnames = [self.img_path / img for img, lbl in self.filenames + if lbl is not None] + + lbl_fnames = [self.label_path / lbl + for img, lbl in self.filenames + if lbl is not None] + + return TiffConnector(img_fnames, lbl_fnames, savepath=self.savepath) + + def _split_img_fnames(self, fraction, random_seed=42): + # i took this out from the split method to be used in split method + # of child methods (e.g. IlasikConnector) + N = len(self.filenames) + + state = np.random.get_state() + np.random.seed(random_seed) + mask = np.random.choice([True, False], size=N, p=[ + 1 - fraction, fraction]) + np.random.set_state(state) + + img_fnames1 = [self.img_path / img + for img, lbl in itertools.compress(self.filenames, + mask)] + + img_fnames2 = [self.img_path / img + for img, lbl in itertools.compress(self.filenames, + ~mask)] + + if len(img_fnames1) == 0: + msg = ('TiffConnector.split({}): ' + + 'First connector is empty!').format(fraction) + warnings.warn(msg) + + if len(img_fnames2) == 0: + msg = ('TiffConnector.split({}): ' + + 'Second connector is empty!').format(fraction) + warnings.warn(msg) + + return img_fnames1, img_fnames2, mask + + def split(self, fraction, random_seed=42): + ''' + Split the images pseudo-randomly into two Connector subsets. + + The first of size `(1-fraction)*N_images`, the other of size + `fraction*N_images` + + Parameters + ---------- + fraction : float + random_seed : float, optional + + Returns + ------- + connector_1, connector_2 + ''' + + img_fnames1, img_fnames2, mask = self._split_img_fnames( + fraction, + random_seed=random_seed) + + lbl_fnames1 = [self.label_path / lbl if lbl is not None else None + for img, lbl in itertools.compress(self.filenames, + mask)] + lbl_fnames2 = [self.label_path / lbl if lbl is not None else None + for img, lbl in itertools.compress(self.filenames, + ~mask)] + + conn1 = TiffConnector(img_fnames1, lbl_fnames1, savepath=self.savepath) + conn2 = TiffConnector(img_fnames2, lbl_fnames2, savepath=self.savepath) + + # ensures that both resulting tiff_connectors have the same + # labelvalue mapping (issue #1) + conn1.labelvalue_mapping = self.labelvalue_mapping + conn2.labelvalue_mapping = self.labelvalue_mapping + + # np.random.seed(None) + return conn1, conn2 + + def image_count(self): + return len(self.filenames) + + @lru_cache(maxsize=10) + def _open_probability_map_file(self, + image_nr, + label_value, + multichannel=False): + # memmap is slow, so we must cache it to be fast! + fname = self.filenames[image_nr].img + T = 1 # time frame in output probmap + if multichannel: + fname = Path('{}.tif'.format(fname.stem)) + n_classes = multichannel + C = n_classes + else: + fname = Path('{}_class_{}.tif'.format(fname.stem, label_value)) + C = 1 # channel in output probmap + + path = self.savepath / fname + + if not path.exists(): + _, Z, X, Y = self.image_dimensions(image_nr) + images = [PlaceHolder((Y, X, C), 'float32')] * Z + Tiff.write(images, io=str(path), imagej_shape=(T, C, Z)) + + return Tiff.memmap_tcz(path) + + def put_tile(self, + pixels, + pos_zxy, + image_nr, + label_value, + multichannel=False): + + assert self.savepath is not None + np.testing.assert_equal(len(pos_zxy), 3) + np.testing.assert_equal(len(pixels.shape), 3) + pixels = np.array(pixels, dtype=np.float32) + + slices = self._open_probability_map_file(image_nr, + label_value, + multichannel=multichannel) + + T = C = 0 + if multichannel: + C = label_value - 1 + Z, X, Y = pos_zxy + ZZ, XX, YY = np.array(pos_zxy) + pixels.shape + for z in range(Z, ZZ): + slices[T, C, z][Y:YY, X:XX] = pixels[z - Z, ...].T + + @lru_cache(maxsize=10) + def _open_image_file(self, image_nr): + # memmap is slow, so we must cache it to be fast! + path = self.img_path / self.filenames[image_nr].img + return Tiff.memmap_tcz(path) + + def image_dimensions(self, image_nr): + + img = self._open_image_file(image_nr) + Y, X = img[0, 0, 0].shape + return np.hstack([img.shape[1:], (X, Y)]) + + def label_matrix_dimensions(self, image_nr): + ''' + Get dimensions of the label image. + + + Parameters + ---------- + image_nr : int + index of image + + Returns + ------- + (nr_channels, nr_zslices, nr_x, nr_y) + Labelmatrix shape. + ''' + lbl = self._open_label_file(image_nr) + if lbl is None: + return + + Y, X = lbl[0, 0, 0].shape + return np.hstack([lbl.shape[1:], (X, Y)]) + + def check_label_matrix_dimensions(self): + ''' + Check if label matrix dimensions fit to image dimensions, i.e. + everything identical except nr of channels (label mat always 1). + + Raises + ------ + AssertionError + If label matrix dimensions don't fit to image dimensions. + ''' + N_channels = None + + for i, (img_fname, lbl_fname) in enumerate(self.filenames): + img_dim = self.image_dimensions(i) + lbl_dim = self.label_matrix_dimensions(i) + + msg = 'Dimensions for image #{}: img.shape={}, lbl.shape={}' + logger.debug(msg.format(i, img_dim, lbl_dim)) + + if lbl_dim is None: + continue + + _, *img_dim = img_dim + ch, *lbl_dim = lbl_dim + + if N_channels is None: + N_channels = ch + + msg = 'Label channels inconsistent for {}'.format(lbl_fname) + np.testing.assert_equal(N_channels, ch, msg) + msg = 'Invalid image dims for {} and {}'.format(img_fname, + lbl_fname) + np.testing.assert_array_equal(lbl_dim, img_dim, msg) + + def _mapped_label_value_to_original(self, label_value): + ''' + self.labelvalue_mapping in reverse + ''' + for c, mapping in enumerate(self.labelvalue_mapping): + reverse_mapping = {v: k for k, v in mapping.items()} + original = reverse_mapping.get(label_value) + if original is not None: + return c, original + + msg = 'Should not be reached! (mapped_label_value={}, mapping={})' + raise Exception(msg.format(label_value, self.labelvalue_mapping)) + + def get_tile(self, image_nr, pos, size): + T = 0 + C, Z, X, Y = pos + CC, ZZ, XX, YY = np.array(pos) + size + + slices = self._open_image_file(image_nr) + tile = [[s[Y:YY, X:XX] for s in c[Z:ZZ]] for c in slices[T, C:CC, :]] + tile = np.stack(tile) + tile = np.moveaxis(tile, (0, 1, 2, 3), (0, 1, 3, 2)) + + return tile.astype('float') + + def label_tile(self, image_nr, pos_zxy, size_zxy, label_value): + + T = 0 + Z, X, Y = pos_zxy + ZZ, XX, YY = np.array(pos_zxy) + size_zxy + C, original_label_value = self._mapped_label_value_to_original( + label_value) + + slices = self._open_label_file(image_nr) + if slices is None: + # return tile with False values + return np.zeros(size_zxy) != 0 + tile = [s[Y:YY, X:XX] for s in slices[T, C, Z:ZZ]] + tile = np.stack(tile) + tile = np.moveaxis(tile, (0, 1, 2), (0, 2, 1)) + + tile = (tile == original_label_value) + return tile + + @lru_cache(maxsize=10) + def _open_label_file(self, image_nr): + # memmap is slow, so we must cache it to be fast! + path = self.img_path / self.filenames[image_nr].img + label_filename = self.filenames[image_nr].lbl + + if label_filename is None: + logger.warning( + 'no label matrix file found for image file %s', str(image_nr)) + return None + + path = self.label_path / label_filename + logger.debug('Trying to load labelmat %s', path) + + return Tiff.memmap_tcz(path) + + @staticmethod + def calc_label_values_mapping(original_labels): + ''' + Assign unique labelvalues to original labelvalues. + + For multichannel label images it might happen, that identical + labels occur in different channels. + to avoid conflicts, original labelvalues are mapped to unique values + in ascending order 1, 2, 3, 4... + This is defined in self.labelvalue_mapping: + + [{orig_label1: 1, orig_label2: 2}, {orig_label1: 3, orig_label2: 4},..] + + Each element of the list correponds to one label channel. + Keys are the original labels, values are the assigned labels that + will be seen by the Dataset object. + + Parameters + ---------- + original_labels : array_like + List of original label values. + + Returns + ------- + dict + Labelvalue mapping with original labels as key and new label as + value. + ''' + new_labels = itertools.count(1) + + label_mappings = [ + {l: next(new_labels) for l in sorted(labels_per_channel)} + for labels_per_channel in original_labels + ] + + logger.debug('Label values are mapped to ascending values:') + logger.debug(label_mappings) + return label_mappings + + @lru_cache(maxsize=1) + def original_label_values_for_all_images(self): + ''' + Get all unique label values per image. + + Returns + ------- + list + List of sets. Each set corresponds to 1 label channel. + each set contains the label values of that channel. + E.g. `[{91, 109, 150}, {90, 100}]` for two label channels + ''' + labels_per_channel = [] + + for image_nr in range(self.image_count()): + slices = self._open_label_file(image_nr) + if slices is None: + continue + + T = 0 + C = slices.shape[1] + labels = [np.unique(np.concatenate([np.unique(s) + for s in slices[T, c, :]])) + for c in range(C)] + labels = [set(labels) - {0} for labels in labels] + + labels_per_channel = [l1.union(l2) + for l1, l2 in zip_longest(labels_per_channel, + labels, + fillvalue=set())] + + return labels_per_channel + + @lru_cache(maxsize=1500) + def label_count_for_image(self, image_nr): + ''' + Get number of labels per labelvalue for an image. + + Parameters + ---------- + image_nr : int + index of image + + Returns + ------- + dict + ''' + slices = self._open_label_file(image_nr) + if slices is None: + return None + + T = 0 + C = slices.shape[1] + labels = [np.unique(np.concatenate([np.unique(s) + for s in slices[T, c, :]])) + for c in range(C)] + + original_label_count = [{l: sum(np.count_nonzero(s == l) + for s in slices[T, c, :]) + for l in labels[c] if l > 0} + for c in range(C)] + label_count = {self.labelvalue_mapping[c][l]: count + for c, orig in enumerate(original_label_count) + for l, count in orig.items()} + return label_count