Source code for WORC.plotting.scatterplot

#!/usr/bin/env python

# Copyright 2016-2019 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import matplotlib.pyplot as plt
except ImportError:
    print("[WORC Warning] Cannot use scatterplot function, as _tkinter is not installed")

import pandas as pd
import argparse
import WORC.processing.label_processing as lp
import os
import glob
from natsort import natsorted


[docs]def main():
    parser = argparse.ArgumentParser(description='Radiomics results')
    parser.add_argument('-feat', '--feat', metavar='feat',
                        nargs='+', dest='feat', type=str, required=True,
                        help='List of patient feature files (HDF)')
    parser.add_argument('-class', '--class', metavar='class',
                        nargs='+', dest='classs', type=str, required=True,
                        help='Classification of patients (text)')
    parser.add_argument('-lab', '--lab', metavar='lab',
                        nargs='+', dest='lab', type=str, required=True,
                        help='Label of two features to plot')
    parser.add_argument('-out', '--out', metavar='out',
                        nargs='+', dest='out', type=str, required=True,
                        help='Output png file')
    args = parser.parse_args()

    if type(args.classs) is list:
        args.classs = ''.join(args.classs)

    if type(args.out) is list:
        args.out = ''.join(args.out)

    if type(args.feat) is list and len(args.feat) == 1:
        args.feat = ''.join(args.feat)

    if os.path.isdir(args.feat):
        args.feat = glob.glob(args.feat + '/features_*.hdf5')
        args.feat = natsorted(args.feat)

        make_scatterplot(args.feat, args.classs, args.lab[0], args.lab[1],
                         args.out)


[docs]def make_scatterplot(features, label_file, feature_label_1, feature_label_2,
                     output):
    # Read and stack the features
    featname = [feature_label_1, feature_label_2]
    image_features_temp = list()
    for i_feat in range(len(features)):
        feat_temp = pd.read_hdf(features[i_feat])
        feat_temp = {k: v for k, v in zip(feat_temp.feature_labels, feat_temp.feature_values) if k in featname}
        image_features_temp.append(feat_temp)

    # Get the mutation labels and patient IDs
    mutation_type = [['MDM2']]
    mutation_data, image_features = gp.findmutationdata(label_file,
                                                        mutation_type,
                                                        features,
                                                        image_features_temp)

    image_features = image_features.tolist()
    # Select the two relevant features
    feat1_c0 = list()
    feat2_c0 = list()
    feat1_c1 = list()
    feat2_c1 = list()
    mutation_label = mutation_data['label'].tolist()[0]
    patient_IDs = mutation_data['patient_IDs'].tolist()

    for imfeat, label, pid in zip(image_features, mutation_label, patient_IDs):
        if label[0] == 0:
            feat1_c0.append(imfeat[feature_label_1])
            feat2_c0.append(imfeat[feature_label_2])
        else:
            feat1_c1.append(imfeat[feature_label_1])
            feat2_c1.append(imfeat[feature_label_2])

    # Make a scatter plot
    f = plt.figure()
    subplot = f.add_subplot(111)
    subplot.plot(feat1_c0, feat2_c0, linestyle='', ms=12, marker='o', color='navy')
    subplot.plot(feat1_c1, feat2_c1, linestyle='', ms=12, marker='x', color='red')
    # NOTE: arbitrary limits!
    # plt.xlim([0, 10])
    # plt.ylim([0, 10])
    plt.xlabel(feature_label_1)
    plt.ylabel(feature_label_2)
    plt.title('Feature scatter plot')
    plt.legend()
    # plt.show()

    f.savefig(output)
    print(("Scatterplot saved as {} !").format(output))


if __name__ == '__main__':
    main()