Source code for WORC.tools.fingerprinting

#!/usr/bin/env python

# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import configparser
import SimpleITK as sitk
from WORC.addexceptions import WORCKeyError, WORCValueError
from WORC.processing.label_processing import findlabeldata

quantitative_modalities = ['CT', 'PET', 'Thermography', 'ADC', 'MG']
qualitative_modalities = ['MRI', 'MR', 'DWI', 'US']
all_modalities = quantitative_modalities + qualitative_modalities


[docs]class Fingerprinter(object):
    """Fingerprinting object for WORC configuration."""

[docs]    def __init__(self):
        """Initialize object."""
        self.images = None
        self.segmentations = None
        self.features = None
        self.labels = None
        self.configuration = None
        self.type = None

[docs]    def execute(self):
        """Determine fingerprint of dataset.

        Parameters
        ----------
        worcobject: WORC object
            WORC object to fingerprint

        """
        # Read the config file
        config = configparser.ConfigParser()
        config.read(self.configuration)

        if self.type == 'classification':
            # Check class balance
            label_type = config['Labels']['label_names']
            if len(label_type) != 1:
                # multiclass pass
                pass
            else:
                if self.images:
                    label_data, objects_out =\
                        findlabeldata(patientinfo=self.labels,
                                      label_type=label_type,
                                      filenames=self.images)

                elif self.features:
                    label_data, objects_out =\
                        findlabeldata(patientinfo=self.labels,
                                      label_type=label_type,
                                      filenames=self.features)
                else:
                    raise WORCValueError('Either need images or features for classification fingerprinting, neither provided.')

                labels = label_data['label'][0].tolist()
                if len(np.unique(labels)) == 2:
                    # Binary classification, check class balance
                    total = float(len(labels))
                    positives = float(np.sum(labels))
                    negatives = total - positives
                    print(positives, negatives, positives/total, negatives/total)
                    if negatives/total > 0.60 or positives/total > 0.60:
                        # Class imbalance, keep resampling
                        print('Class imbalance, keep default Resampling usage of 0.20.')
                        config['Resampling']['Use'] = '0.20'
                    else:
                        # No class imbalance, turn resampling off
                        print('No class imbalance, setting Resampling usage to 0.0.')
                        config['Resampling']['Use'] = '0.00'

        elif self.type == 'images':
            # Determine modality
            modality = config['ImageFeatures']['image_type']

            if modality in qualitative_modalities:
                # Apply image normalization and use fixed bin count
                print('Qualitative modality: Apply image normalization and use fixed bin count.')
                config['Preprocessing']['Normalize'] = 'True'
                config['PyRadiomics']['binCount'] = config['ImageFeatures']['GLCM_levels']
                config['PyRadiomics']['binWidth'] = 'None'

            elif modality in quantitative_modalities:
                # Apply no image normalization and use fixed bin width
                print('Quantitative modality: Apply no image normalization and use fixed bin width.')
                config['Preprocessing']['Normalize'] = 'False'
                config['PyRadiomics']['binCount'] = 'None'
                config['PyRadiomics']['binWidth'] = '25'  # Default of PyRadiomics

            else:
                raise WORCKeyError(f'{modality} is not a known modality, should be one of {all_modalities}.')

            # Determine if we are dealing with 2D, 2.5D, or 3D images and segmentations
            spacings_x = list()
            spacings_y = list()
            spacings_z = list()

            max_num_images = int(config['Fingerprinting']['max_num_image'])
            if len(self.images) > max_num_images:
                self.images = self.images[0:max_num_images]
                # FIXME
                if self.segmentations is not None:
                    print('FIXME: segmentations is None')
                    self.segmentations = self.segmentations[0:max_num_images]

            for imagefile in self.images:
                image = sitk.ReadImage(imagefile)
                spacings_x.append(image.GetSpacing()[0])
                spacings_y.append(image.GetSpacing()[1])
                spacings_z.append(image.GetSpacing()[2])

            mean_spacings = [np.mean(spacings_x), np.mean(spacings_y), np.mean(spacings_z)]

            # Assume x and y spacing are the same, so we just compare x with z
            if mean_spacings[2] > 2.0*mean_spacings[0]:
                # 2.5D images, use standard feature extraction
                print(f'Mean spacing {mean_spacings}, thus 2.5D images, use standard feature extraction.')
                config['ImageFeatures']['extraction_mode'] = '2.5D'
                pass
            else:
                # 3D images, thus compute 3D shape features
                print(f'Mean spacing {mean_spacings}, thus 3D images, use 3D feature extraction.')
                config['ImageFeatures']['extraction_mode'] = '3D'

                # We keep all the default 2.5D features, as most of them are only defined in 2D
                config['ImageFeatures']['texture_Gabor'] = 'True'
                config['ImageFeatures']['texture_LBP'] = 'True'
                config['ImageFeatures']['texture_GLCM'] = 'True'
                config['ImageFeatures']['texture_GLCMMS'] = 'True'
                config['ImageFeatures']['vessel'] = 'True'
                config['ImageFeatures']['log'] = 'True'
                config['ImageFeatures']['phase'] = 'True'

            # Check if segmentations are 2D or 3D
            # FIXME ugly solution
            if self.segmentations is not None:
                num_masked_slices_all = list()
                for segmentationfile in self.segmentations:
                    segmentation = sitk.GetArrayFromImage(sitk.ReadImage(segmentationfile))
                    segmentation = segmentation.astype(np.bool)
                    num_masked_slices = len(np.flatnonzero(np.any(segmentation, axis=(1, 2))))
                    num_masked_slices_all.append(num_masked_slices)

                if all(elem == 1 for elem in num_masked_slices_all):
                    print('All masks only contain one slice, so turn of 3D features.')
                    # NOTE: PREDICT will mostly switch itself between these features by looking at the masks
                    config['ImageFeatures']['extraction_mode'] = '2D'

                    config['ImageFeatures']['orientation'] = 'False'
                    config['ImageFeatures']['texture_GLCMMS'] = 'False'

                    # For PyRadiomics, only this parameter needs to be changed
                    config['PyRadiomics']['force2D'] = 'True'

        else:
            raise WORCValueError(f'Type {type} is not valid for fingeprinting. Should be one of ["classification", "images"].')

        return config