Source code for WORC.plotting.plot_boxplot_features

#!/usr/bin/env python

# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

import WORC.IOparser.config_io_classifier as config_io
from WORC.IOparser.file_io import load_features
import os
import numpy as np
import zipfile
import matplotlib
import matplotlib.pyplot as plt

[docs]def plot_boxplot_features(features, label_data, config, output_zip, label_type=None, verbose=False): # Load variables from the config file config = config_io.load_config(config) # Create output folder if required if not os.path.exists(os.path.dirname(output_zip)): os.makedirs(os.path.dirname(output_zip)) if label_type is None: label_type = config['Labels']['label_names'] # Read and stack the features if verbose: print("Reading features and label data.") label_data, image_features =\ load_features(features, label_data, label_type) # Generate the actual boxplots generate_feature_boxplots(image_features, label_data, output_zip, verbose=verbose)
[docs]def generate_feature_boxplots(image_features, label_data, output_zip, dpi=500, verbose=False): ''' Generate boxplots of the feature values among different objects. Parameters ---------- features: list, mandatory List with a dictionary of the feature labels and values for each patient. label_data: pandas dataframe, mandatory Dataframe containing the labels of the objects. outputfolder: path, mandatory Folder to which the output boxplots should be written. ''' labels = image_features[0][1] featvect = dict() flab = dict() for l in labels: featvect[l] = {"all": [], "1": [], "0": []} flab[l] = {"all": [], "1": [], "0": []} # Stack per feature type and class if verbose: print("Stacking features.") label = label_data['label'].tolist()[0] patient_IDs = label_data['patient_IDs'].tolist() for imfeat, label, pid in zip(image_features, label, patient_IDs): imfeat = imfeat[0] for flnum, fl in enumerate(labels): featvect[fl]['all'].append(imfeat[flnum]) flab[fl]['all'].append(pid) if label[0] == 0: featvect[fl]['0'].append(imfeat[flnum]) flab[fl]['0'].append(pid) else: featvect[fl]['1'].append(imfeat[flnum]) flab[fl]['1'].append(pid) # Generate the output zip file zipf = zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) outputfolder_temp = os.path.join(os.path.dirname(output_zip)) # Create the boxplots if verbose: print("Generating boxplots.") # Split in 5x5 figures. nfig = np.ceil(len(labels) / 25.0) labels = sorted(labels) for fi in range(0, int(nfig)): f = plt.figure(figsize=(13, 10)) fignum = 1 for i in range(fi*25, min((fi+1)*25, len(labels))): ax = plt.subplot(5, 5, fignum) lab = labels[i] plt.subplots_adjust(hspace=0.3, wspace=0.2) ax.scatter(np.ones(len(featvect[lab]['all'])), featvect[lab]['all'], color='blue') ax.scatter(np.ones(len(featvect[lab]['1']))*2.0, featvect[lab]['1'], color='red') ax.scatter(np.ones(len(featvect[lab]['0']))*3.0, featvect[lab]['0'], color='green') plt.boxplot([featvect[lab]['all'], featvect[lab]['1'], featvect[lab]['0']]) fz = 5 # Works best after saving # Alter the label, remove redundant parts lab = lab.replace('featureconverter_', '') lab = lab.replace('train_', '') lab = lab.replace('test_', '') lab = lab.replace('CalcFeatures', '') lab = lab.replace('predict', '') lab = lab.replace('pyradiomics', '') ax.set_title(lab, fontsize=fz) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(fz) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(fz) fignum += 1 # Maximize figure to get correct spacings # mng = plt.get_current_fig_manager() # mng.resize(*mng.window.maxsize()) # # High DTI to make sure we save the maximized image fname = ('boxplot_{}.png').format(str(fi)) outputname = os.path.join(outputfolder_temp, fname) f.savefig(outputname, bbox_inches='tight', pad_inches=0, dpi=dpi) if verbose: print(("Boxplot saved as {} !").format(outputname)) # Copy the image to the zipfile and remove image zipf.write(outputname, os.path.basename(outputname)) plt.close() os.remove(outputname)