Source code for WORC.plotting.plot_errors

#!/usr/bin/env python

# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

import os
import numpy as np
from WORC.IOparser.file_io import load_features
import pandas as pd
import WORC.addexceptions as ae
import tikzplotlib


[docs]def plot_errors(featurefiles, patientinfo, label_type, featurenames, posteriors_csv=None, agesex=True, output_png=None, output_tex=None): """Scatterplot of all objects with marking of errors.""" # check some input if len(featurenames) != 2: raise ae.WORCValueError(f'Featurenames should be list with two strings, got {featurenames}.') # Read the features and classification data print("Reading features and label data.") label_data, image_features =\ load_features(featurefiles, patientinfo, label_type) # Read in the scores and labels if posteriors_csv is not None: data = pd.read_csv(posteriors_csv) PIDs = data['PatientID'].values labels = data['TrueLabel'].values.tolist() probabilities = data['Probability'].values # Convert probabilities to correct / not correct = list() for label, prob in zip(labels, probabilities): if prob >= 0.5 and label == 1.0: correct.append(True) elif prob < 0.5 and label == 0.0: correct.append(True) else: # incorrect prediction correct.append(False) # Select indices of features we need feature_labels = image_features[0][1] if featurenames[0] not in feature_labels: raise ae.WORCKeyError(f'Feature {featurenames[0]} not in feature names.') else: feature_1_index = feature_labels.index(featurenames[0]) if featurenames[1] not in feature_labels: raise ae.WORCKeyError(f'Feature {featurenames[1]} not in feature names.') else: feature_2_index = feature_labels.index(featurenames[1]) # Match probabilities to features feature_1 = list() feature_2 = list() for pid, prob in zip(PIDs, probabilities): if pid not in label_data['patient_IDs']: raise ae.WORCKeyError(f'Features for {pid} not provided.') else: index = label_data['patient_IDs'].tolist().index(pid) feature_1.append(image_features[index][0][feature_1_index]) feature_2.append(image_features[index][0][feature_2_index]) # Resort based on PID order = np.argsort(PIDs) feature_1 = [feature_1[index] for index in order] feature_2 = [feature_2[index] for index in order] correct = [correct[index] for index in order] labels = [labels[index] for index in order] # Actual plotting f = plt.figure(figsize=(20, 15)) ax = plt.subplot(1, 1, 1) coordinates = list() for index, label in enumerate(labels): # Check if coordinate has already been plotted coordinate = (feature_1[index], feature_2[index]) addoffset = 1 sign = 1 while coordinate in coordinates: # Coordinate plotted, add an x-offset offset = sign * 0.01 * addoffset coordinate = (feature_1[index] + offset, feature_2[index]) if sign == 1: sign = -1 else: sign = 1 addoffset += 1 coordinates.append(coordinate) # Red border if classification is incorrect if not correct[index]: ax.scatter(coordinate[0], coordinate[1], s=80, marker='o', color='red') s = 30 else: s = 50 # Plot point in feature space if label == 1.0: ax.scatter(coordinate[0], coordinate[1], s=s, marker='o', color='#7dcfe2') else: ax.scatter(coordinate[0], coordinate[1], s=s, marker='o', color='blue') # Add some labelling etc to the plot if agesex: plt.xlabel('Sex', size=12) plt.ylabel('Age', size=12) plt.xticks([0, 1], ['Female', 'Male'], size=8) else: plt.xlabel(featurenames[0], size=12) plt.ylabel(featurenames[1], size=12) # Save output if output_png is not None: plt.savefig(output_png, bbox_inches='tight', pad_inches=0) print(f"Plot saved as {output_png}!") if output_tex is not None: tikzplotlib.save(output_tex) print(f"Plot saved as {output_tex}!")