Source code for WORC.tests.WORCTutorialSimple_unittest_regression

# Welcome to the tutorial of WORC: a Workflow for Optimal Radiomics
# Classification! It will provide you with basis knowledge and practical
# skills on how to run the WORC. For advanced topics and WORCflows, please see
# the other notebooks provided with this tutorial. For installation details,
# see the ReadMe.md provided with this tutorial.

# This tutorial interacts with WORC through SimpleWORC and is especially
# suitable for first time usage.

# import neccesary packages
from WORC import BasicWORC
import os

# These packages are only used in analysing the results
import pandas as pd
import json
import fastr
import glob
import shutil

# If you don't want to use your own data, we use the following example set,
# see also the next code block in this example.
from WORC.exampledata.datadownloader import download_HeadAndNeck

# Define the folder this script is in, so we can easily find the example data
script_path = os.path.dirname(os.path.abspath(__file__))

# Determine whether you would like to use WORC for classification or regression
modus = 'regression'


[docs]def main(): """Execute WORC Tutorial experiment.""" print(f"Running in folder: {script_path}.") # --------------------------------------------------------------------------- # Input # --------------------------------------------------------------------------- # The minimal inputs to WORC are: # - Images # - Segmentations # - Labels # # In SimpleWORC, we assume you have a folder "datadir", in which there is a # folder for each patient, where in each folder there is a image.nii.gz and a mask.nii.gz: # Datadir # Patient_001 # image.nii.gz # mask.nii.gz # Patient_002 # image.nii.gz # mask.nii.gz # ... # # # You can skip this part if you use your own data. # In the example, We will use open source data from the online XNAT platform # at https://xnat.bmia.nl/data/archive/projects/stwstrategyhn1. This dataset # consists of CT scans of patients with Head and Neck tumors. We will download # a subset of 20 patients in this folder. You can change this settings if you # like nsubjects = 20 # use "all" to download all patients # NOTE: already downloaded for classification, so simply set the data path if 'runner' in script_path: data_path = '/home/runner/work/WORC/WORC/WORCTutorial/Data' elif '/home/martijn' in script_path: data_path = '/home/martijn/git/WORCTutorial/Data' elif 'Martijn Starmans' in script_path: data_path = r'C:\Users\Martijn Starmans\Documents\GitHub\WORCTutorial\Data' else: raise ValueError("System not recognized") # download_HeadAndNeck(datafolder=data_path, nsubjects=nsubjects) # Identify our data structure: change the fields below accordingly # if you use your own data. imagedatadir = os.path.join(data_path, 'stwstrategyhn1') image_file_name = 'image.nii.gz' segmentation_file_name = 'mask.nii.gz' # File in which the labels (i.e. outcome you want to predict) is stated # Again, change this accordingly if you use your own data. label_file = os.path.join(data_path, 'Examplefiles', 'pinfo_HN.csv') # Name of the label you want to predict if modus == 'binary_classification': # Classification: predict a binary (0 or 1) label label_name = ['imaginary_label_1'] elif modus == 'regression': # Regression: predict a continuous label label_name = ['Age'] elif modus == 'multiclass_classification': # Multiclass classification: predict several mutually exclusive binaru labels together label_name = ['imaginary_label_1', 'complement_label_1'] # Determine whether we want to do a coarse quick experiment, or a full lengthy # one. Again, change this accordingly if you use your own data. coarse = True # Give your experiment a name experiment_name = 'Example_STWStrategyHN_Regression' # Instead of the default tempdir, let's but the temporary output in a subfolder # in the same folder as this script tmpdir = os.path.join(script_path, 'WORC_' + experiment_name) print(f"Temporary folder: {tmpdir}.") # Remove temp and output folders outputfolder = fastr.config.mounts['output'] experiment_folder = os.path.join(outputfolder, 'WORC_' + experiment_name) if os.path.exists(tmpdir): shutil.rmtree(tmpdir) if os.path.exists(experiment_folder): shutil.rmtree(experiment_folder) # --------------------------------------------------------------------------- # The actual experiment # --------------------------------------------------------------------------- # Create a Basic WORC object to be able to set features ourselves experiment = BasicWORC(experiment_name) # Set the input data according to the variables we defined earlier experiment.images_from_this_directory(imagedatadir, image_file_name=image_file_name) experiment.segmentations_from_this_directory(imagedatadir, segmentation_file_name=segmentation_file_name) # Use features from classification to be quicker # Locate output folder classification_experiment_folder =\ os.path.join(outputfolder, 'WORC_Example_STWStrategyHN') feature_files = glob.glob(os.path.join(classification_experiment_folder, 'Features', 'features_*.hdf5')) features_train = {os.path.splitext(os.path.basename(f))[0].split('CT_0_')[1]: f for f in feature_files} experiment.features_train.append(features_train) # Labels experiment.labels_from_this_file(label_file) experiment.predict_labels(label_name) # Set the types of images WORC has to process. Used in fingerprinting # Valid quantitative types are ['CT', 'PET', 'Thermography', 'ADC'] # Valid qualitative types are ['MRI', 'DWI', 'US'] experiment.set_image_types(['CT']) # Use the standard workflow for binary classification if modus == 'classification': experiment.binary_classification(coarse=coarse) elif modus == 'regression': experiment.regression(coarse=coarse) # Set the temporary directory experiment.set_tmpdir(tmpdir) # Extra's semantics_file = os.path.join(data_path, 'Examplefiles', 'semantics_HN.csv') experiment.semantics_from_this_file(semantics_file) experiment.add_evaluation() # experiment.add_config_overrides({'General': # {'FeatureCalculators': '[predict/CalcFeatures:1.0, pyradiomics/Pyradiomics:1.0]'} # }) experiment.set_multicore_execution() # Run the experiment! experiment.execute() # NOTE: Precomputed features can be used instead of images and masks # by instead using ``experiment.features_from_this_directory(featuresdatadir)`` in a similar fashion. # --------------------------------------------------------------------------- # Analysis of results # --------------------------------------------------------------------------- # There are two main outputs: the features for each patient/object, and the overall # performance. These are stored as .hdf5 and .json files, respectively. By # default, they are saved in the so-called "fastr output mount", in a subfolder # named after your experiment name. # Read the overall peformance performance_file = os.path.join(experiment_folder, 'performance_all_0.json') if not os.path.exists(performance_file): print('No performance file found: your network has failed.') else: with open(performance_file, 'r') as fp: performance = json.load(fp) # Print the output performance print("\n Performance:") stats = performance['Statistics'] for k, v in stats.items(): print(f"\t {k} {v}.")
# NOTE: the performance is probably horrible, which is expected as we ran # the experiment on coarse settings. These settings are recommended to only # use for testing: see also below. # --------------------------------------------------------------------------- # Tips and Tricks # --------------------------------------------------------------------------- # For tips and tricks on running a full experiment instead of this simple # example, adding more evaluation options, debuggin a crashed network etcetera, # please go to https://worc.readthedocs.io/en/latest/static/user_manual.html # Some things we would advice to always do: # - Run actual experiments on the full settings (coarse=False): # coarse = False # experiment.binary_classification(coarse=coarse) # Note: this will result in more computation time. We therefore recommmend # to run this script on either a cluster or high performance PC. If so, # you may change the execution to use multiple cores to speed up computation # just before before experiment.execute(): # experiment.set_multicore_execution() # # - Add extensive evaluation: experiment.add_evaluation() before experiment.execute(): # experiment.add_evaluation() if __name__ == '__main__': main()