Source code for WORC.plotting.scatterplot

#!/usr/bin/env python

# Copyright 2016-2019 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

    import matplotlib.pyplot as plt
except ImportError:
    print("[WORC Warning] Cannot use scatterplot function, as _tkinter is not installed")

import pandas as pd
import argparse
import WORC.processing.label_processing as lp
import os
import glob
from natsort import natsorted

[docs]def main(): parser = argparse.ArgumentParser(description='Radiomics results') parser.add_argument('-feat', '--feat', metavar='feat', nargs='+', dest='feat', type=str, required=True, help='List of patient feature files (HDF)') parser.add_argument('-class', '--class', metavar='class', nargs='+', dest='classs', type=str, required=True, help='Classification of patients (text)') parser.add_argument('-lab', '--lab', metavar='lab', nargs='+', dest='lab', type=str, required=True, help='Label of two features to plot') parser.add_argument('-out', '--out', metavar='out', nargs='+', dest='out', type=str, required=True, help='Output png file') args = parser.parse_args() if type(args.classs) is list: args.classs = ''.join(args.classs) if type(args.out) is list: args.out = ''.join(args.out) if type(args.feat) is list and len(args.feat) == 1: args.feat = ''.join(args.feat) if os.path.isdir(args.feat): args.feat = glob.glob(args.feat + '/features_*.hdf5') args.feat = natsorted(args.feat) make_scatterplot(args.feat, args.classs, args.lab[0], args.lab[1], args.out)
[docs]def make_scatterplot(features, label_file, feature_label_1, feature_label_2, output): # Read and stack the features featname = [feature_label_1, feature_label_2] image_features_temp = list() for i_feat in range(len(features)): feat_temp = pd.read_hdf(features[i_feat]) feat_temp = {k: v for k, v in zip(feat_temp.feature_labels, feat_temp.feature_values) if k in featname} image_features_temp.append(feat_temp) # Get the mutation labels and patient IDs mutation_type = [['MDM2']] mutation_data, image_features = gp.findmutationdata(label_file, mutation_type, features, image_features_temp) image_features = image_features.tolist() # Select the two relevant features feat1_c0 = list() feat2_c0 = list() feat1_c1 = list() feat2_c1 = list() mutation_label = mutation_data['label'].tolist()[0] patient_IDs = mutation_data['patient_IDs'].tolist() for imfeat, label, pid in zip(image_features, mutation_label, patient_IDs): if label[0] == 0: feat1_c0.append(imfeat[feature_label_1]) feat2_c0.append(imfeat[feature_label_2]) else: feat1_c1.append(imfeat[feature_label_1]) feat2_c1.append(imfeat[feature_label_2]) # Make a scatter plot f = plt.figure() subplot = f.add_subplot(111) subplot.plot(feat1_c0, feat2_c0, linestyle='', ms=12, marker='o', color='navy') subplot.plot(feat1_c1, feat2_c1, linestyle='', ms=12, marker='x', color='red') # NOTE: arbitrary limits! # plt.xlim([0, 10]) # plt.ylim([0, 10]) plt.xlabel(feature_label_1) plt.ylabel(feature_label_2) plt.title('Feature scatter plot') plt.legend() # f.savefig(output) print(("Scatterplot saved as {} !").format(output))
if __name__ == '__main__': main()