#!/usr/bin/env python
# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABC, abstractmethod
from WORC.processing.label_processing import load_labels
import WORC.addexceptions as ae
import os
# Global variables
min_subjects = 10
recommended_subjects = 50
[docs]class AbstractValidator(ABC):
# noinspection PyBroadException
[docs] def do_validation(self, *args, **kwargs):
# try:
result = self._validate(*args, **kwargs)
if result is None:
result = True
# except:
# result = False
msg = self._generate_detector_message(result)
if msg:
print(msg)
return result
def _generate_detector_message(self, validated_value):
return f"{self.__class__.__name__[0:-8]} validated: {validated_value}."
@abstractmethod
def _validate(self, *args, **kwargs):
pass
[docs]class SimpleValidator(AbstractValidator):
def _validate(self, simpleworc, *args, **kwargs):
# if not simpleworc._labels_file_train:
# if hasattr(simpleworc, 'labels_file_train'):
# if not simpleworc.labels_file_train:
# raise ae.WORCValueError(f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.')
# else:
# raise ae.WORCValueError(f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.')
if not simpleworc._label_names:
if not simpleworc.label_names:
raise ae.WORCValueError(f'No label(s) to predict selected. Use SimpleWorc().predict_labels(**) to select labels.')
if not simpleworc._method:
raise ae.WORCValueError(f'No method selected. Call function binary_classification(**) or regression(**) or survival(**) on SimpleWorc().')
if simpleworc._images_train:
for num, (ims, segs) in enumerate(zip(simpleworc._images_train, simpleworc._segmentations_train)):
if ims.keys() != segs.keys():
raise ae.WORCValueError(f'Subjects in images_train and segmentations_train are not the same for modality {num}.')
if hasattr(simpleworc, 'images_train'):
if simpleworc.images_train:
for num, (ims, segs) in enumerate(zip(simpleworc.images_train, simpleworc.segmentations_train)):
if ims.keys() != segs.keys():
raise ae.WORCValueError(f'Subjects in images_train and segmentations_train are not the same for modality {num}.')
if simpleworc._worc.images_train:
if len(simpleworc._worc.images_train) != len(simpleworc._image_types):
raise ae.WORCValueError(f'Number of image types you said you would provide (image_types: {len(simpleworc._image_types)}) is not the same as the actual number of image types provided (images_train: {len(simpleworc._worc.images_train)}).')
[docs]class MinSubjectsValidator(AbstractValidator):
def _validate(self, simpleworc, *args, **kwargs):
if simpleworc._num_subjects < min_subjects:
raise ae.WORCValueError(f'Less than {min_subjects} subjects (you have {simpleworc._num_subjects}) will probably make WORC crash due to a split in the test/validation set having only one subject. Use at least {min_subjects} subjects or more.')
# class EvaluateValidator(AbstractValidator):
# def _validate(self, simpleworc, *args, **kwargs):
# if simpleworc._add_evaluation:
# if not simpleworc._images_train:
# if hasattr(simpleworc, 'images_train'):
# if not simpleworc.images_train:
# raise ae.WORCValueError(f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.')
# else:
# raise ae.WORCValueError(f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.')
[docs]class SamplesWarning(AbstractValidator):
# Not really a validator, but more a good practice. Hence this won't throw an exception but prints a warning instead.
def _validate(self, simpleworc, *args, **kwargs):
if simpleworc._method == 'classification':
if simpleworc._num_subjects < len(simpleworc._label_names) * recommended_subjects: # at least 100 subjects per label recommended
print(f'Warning: at least {len(simpleworc._label_names) * recommended_subjects} subjects is recommended when predicting {len(simpleworc._label_names)} labels. Current subject count is: {simpleworc._num_subjects}')
elif simpleworc._method == 'regression':
# TODO @martijn not sure how to tackle this, what would be a reasonable amount of subjects for regression?
pass
[docs]class InvalidLabelsValidator(AbstractValidator):
def _validate(self, simpleworc):
labels_file_train = None
labels_file_test = None
if simpleworc._labels_file_train:
labels_file_train = simpleworc._labels_file_train
elif simpleworc.labels_file_train:
labels_file_train = simpleworc.labels_file_train
elif simpleworc.trained_model is not None or simpleworc._trained_model is not None:
# Inference, only testing objects
if simpleworc._labels_file_test:
labels_file_test = simpleworc._labels_file_test
elif simpleworc.labels_file_test:
labels_file_test = simpleworc.labels_file_test
else:
raise ae.WORCValueError(f'No test labels, use SimpleWorc().labels_from_this_file(**) to add labels.')
else:
raise ae.WORCValueError(f'No training labels, use SimpleWorc().labels_from_this_file(**) to add labels.')
if labels_file_train is not None:
self._validate_labels_file(labels_file_train)
if labels_file_test is not None:
self._validate_labels_file(labels_file_test)
def _validate_labels_file(self, labels_file):
errstr = None
if not os.path.exists(labels_file):
raise ae.WORCValueError(f'Given label file {labels_file} does not exist.')
try:
label_data = load_labels(labels_file)
except ae.WORCAssertionError as wae:
if 'First column should be patient ID' in str(wae):
# TODO: print wrong column name and file so that it is clear what needs to be replaced in which file
raise ae.WORCValueError(f'First column in the file given to SimpleWORC().labels_from_this_file(**) needs to be named Patient.')
# check labels for substrings of eachother
labels = label_data['label_name']
subjects = label_data['patient_IDs']
labels_matches = self._get_all_substrings_for_array(labels)
if labels_matches:
# if not empty we have a problem
errstr = "Found label(s) that are a substring of other label(s). This is currently not allowed in WORC. Rename the following label(s):\n"
for label, matches in labels_matches.items():
for match in matches:
errstr += f"{label} is a substring of {match}\n"
# check subject names for substrings of eachother
subjects_matches = self._get_all_substrings_for_array(subjects)
if subjects_matches:
# if not empty we have a problem
errstr = "Found subject(s) that are a substring of other subject(s). This is currently not allowed in WORC. Rename the following subject(s):\n"
for subject, matches in subjects_matches.items():
for match in matches:
errstr += f"{subject} is a substring of {match}\n"
if errstr:
raise ae.WORCValueError(errstr)
def _get_all_substrings_for_array(self, arr):
# generate a dict with substrings of each element in array
all_matches = {}
for strcmp in arr:
matches = [s for s in arr if s != strcmp and strcmp in s]
if matches:
all_matches[strcmp] = matches
return all_matches
[docs]class ValidatorsFactory:
[docs] @staticmethod
def factor_validators():
return [
SimpleValidator(),
MinSubjectsValidator(),
SamplesWarning(),
InvalidLabelsValidator()
]
__all__ = [ValidatorsFactory]