#!/usr/bin/env python
# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import configparser
import SimpleITK as sitk
from WORC.addexceptions import WORCKeyError, WORCValueError
from WORC.processing.label_processing import findlabeldata
quantitative_modalities = ['CT', 'PET', 'Thermography', 'ADC', 'MG']
qualitative_modalities = ['MRI', 'MR', 'DWI', 'US']
all_modalities = quantitative_modalities + qualitative_modalities
[docs]class Fingerprinter(object):
"""Fingerprinting object for WORC configuration."""
[docs] def __init__(self):
"""Initialize object."""
self.images = None
self.segmentations = None
self.features = None
self.labels = None
self.configuration = None
self.type = None
[docs] def execute(self):
"""Determine fingerprint of dataset.
Parameters
----------
worcobject: WORC object
WORC object to fingerprint
"""
# Read the config file
config = configparser.ConfigParser()
config.read(self.configuration)
if self.type == 'classification':
# Check class balance
label_type = config['Labels']['label_names']
if len(label_type) != 1:
# multiclass pass
pass
else:
if self.images:
label_data, objects_out =\
findlabeldata(patientinfo=self.labels,
label_type=label_type,
filenames=self.images)
elif self.features:
label_data, objects_out =\
findlabeldata(patientinfo=self.labels,
label_type=label_type,
filenames=self.features)
else:
raise WORCValueError('Either need images or features for classification fingerprinting, neither provided.')
labels = label_data['label'][0].tolist()
if len(np.unique(labels)) == 2:
# Binary classification, check class balance
total = float(len(labels))
positives = float(np.sum(labels))
negatives = total - positives
print(positives, negatives, positives/total, negatives/total)
if negatives/total > 0.60 or positives/total > 0.60:
# Class imbalance, keep resampling
print('Class imbalance, keep default Resampling usage of 0.20.')
config['Resampling']['Use'] = '0.20'
else:
# No class imbalance, turn resampling off
print('No class imbalance, setting Resampling usage to 0.0.')
config['Resampling']['Use'] = '0.00'
elif self.type == 'images':
# Determine modality
modality = config['ImageFeatures']['image_type']
if modality in qualitative_modalities:
# Apply image normalization and use fixed bin count
print('Qualitative modality: Apply image normalization and use fixed bin count.')
config['Preprocessing']['Normalize'] = 'True'
config['PyRadiomics']['binCount'] = config['ImageFeatures']['GLCM_levels']
config['PyRadiomics']['binWidth'] = 'None'
elif modality in quantitative_modalities:
# Apply no image normalization and use fixed bin width
print('Quantitative modality: Apply no image normalization and use fixed bin width.')
config['Preprocessing']['Normalize'] = 'False'
config['PyRadiomics']['binCount'] = 'None'
config['PyRadiomics']['binWidth'] = '25' # Default of PyRadiomics
else:
raise WORCKeyError(f'{modality} is not a known modality, should be one of {all_modalities}.')
# Determine if we are dealing with 2D, 2.5D, or 3D images and segmentations
spacings_x = list()
spacings_y = list()
spacings_z = list()
max_num_images = int(config['Fingerprinting']['max_num_image'])
if len(self.images) > max_num_images:
self.images = self.images[0:max_num_images]
# FIXME
if self.segmentations is not None:
print('FIXME: segmentations is None')
self.segmentations = self.segmentations[0:max_num_images]
for imagefile in self.images:
image = sitk.ReadImage(imagefile)
spacings_x.append(image.GetSpacing()[0])
spacings_y.append(image.GetSpacing()[1])
spacings_z.append(image.GetSpacing()[2])
mean_spacings = [np.mean(spacings_x), np.mean(spacings_y), np.mean(spacings_z)]
# Assume x and y spacing are the same, so we just compare x with z
if mean_spacings[2] > 2.0*mean_spacings[0]:
# 2.5D images, use standard feature extraction
print(f'Mean spacing {mean_spacings}, thus 2.5D images, use standard feature extraction.')
config['ImageFeatures']['extraction_mode'] = '2.5D'
pass
else:
# 3D images, thus compute 3D shape features
print(f'Mean spacing {mean_spacings}, thus 3D images, use 3D feature extraction.')
config['ImageFeatures']['extraction_mode'] = '3D'
# We keep all the default 2.5D features, as most of them are only defined in 2D
config['ImageFeatures']['texture_Gabor'] = 'True'
config['ImageFeatures']['texture_LBP'] = 'True'
config['ImageFeatures']['texture_GLCM'] = 'True'
config['ImageFeatures']['texture_GLCMMS'] = 'True'
config['ImageFeatures']['vessel'] = 'True'
config['ImageFeatures']['log'] = 'True'
config['ImageFeatures']['phase'] = 'True'
# Check if segmentations are 2D or 3D
# FIXME ugly solution
if self.segmentations is not None:
num_masked_slices_all = list()
for segmentationfile in self.segmentations:
segmentation = sitk.GetArrayFromImage(sitk.ReadImage(segmentationfile))
segmentation = segmentation.astype(np.bool)
num_masked_slices = len(np.flatnonzero(np.any(segmentation, axis=(1, 2))))
num_masked_slices_all.append(num_masked_slices)
if all(elem == 1 for elem in num_masked_slices_all):
print('All masks only contain one slice, so turn of 3D features.')
# NOTE: PREDICT will mostly switch itself between these features by looking at the masks
config['ImageFeatures']['extraction_mode'] = '2D'
config['ImageFeatures']['orientation'] = 'False'
config['ImageFeatures']['texture_GLCMMS'] = 'False'
# For PyRadiomics, only this parameter needs to be changed
config['PyRadiomics']['force2D'] = 'True'
else:
raise WORCValueError(f'Type {type} is not valid for fingeprinting. Should be one of ["classification", "images"].')
return config