Source code for WORC.plotting.plot_barchart

#!/usr/bin/env python

# Copyright 2016-2019 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

import tikzplotlib
import numpy as np
import pandas as pd
from collections import Counter
import argparse


[docs]def plot_barchart(prediction, estimators=10, label_type=None, output_tex=None,
                  output_png=None):
    '''
    Make a barchart of the top X hyperparameters settings of the ranked
    estimators in all cross validation iterations.

    Parameters
    ----------
    prediction: filepath, mandatory
        Path pointing to the .hdf5 file which was is the output of the
        trainclassifier function.

    estimators: integer, default 10
        Number of hyperparameter settings/estimators used in each cross
        validation. The settings are ranked, so when supplying e.g. 10,
        the best 10 settings in each cross validation setting will be used.

    label_type: string, default None
        The name of the label predicted by the estimator. If None,
        the first label from the prediction file will be used.

    output_tex: filepath, optional
        If given, the barchart will be written to this tex file.

    output_png: filepath, optional
        If given, the barchart will be written to this png file.

    Returns
    ----------
    fig: matplotlib figure
        The figure in which the barchart is plotted.

    '''
    # Load input prediction
    prediction = pd.read_hdf(prediction)

    # Determine for which label we extract the estimator
    keys = prediction.keys()
    if label_type is None:
        label_type = keys[0]

    try:
        prediction = prediction[label_type]
    except KeyError:
        # Multiclass reroute
        prediction = prediction[keys[0]]

    # Extract the parameter settings:
    parameters = dict()
    for n_crossval, est in enumerate(prediction.classifiers):
        for n_setting in range(0, estimators):
            # Extract parameter settings of nth estimator
            parameters_all = est.cv_results_['params'][n_setting]

            # Stack settings in parameters dictionary
            for k in parameters_all.keys():
                if k not in parameters.keys():
                    parameters[k] = list()
                parameters[k].append(parameters_all[k])

    # Count for every parameter how many times a setting occurs
    counts = count_parameters(parameters)

    # Normalize the values
    normalization_factor = len(prediction.classifiers) * estimators

    # Make the barplot
    fig = plot_bars(counts, normalization_factor)

    # Try making it fullscreen

    # Save the output
    if output_tex is not None:
        print(f'Saving barchart to {output_tex}.')
        tikzplotlib.save(output_tex)

    if output_png is not None:
        print(f'Saving barchart to {output_png}.')
        fig.savefig(output_png, bbox_inches='tight', pad_inches=0, dpi=500)


[docs]def plot_bars(params, normalization_factor=None, figwidth=40, fontsize=30,
              spacing=2):

    # Fixing random state for reproducibility
    np.random.seed(19680801)

    # Count how often feature groups are used
    ntimes_groups = list()
    groups = list()
    for key in params.keys():
        # Check if parameter is a boolean
        if 'True' in params[key].keys() or 'False' in params[key].keys():
            if 'True' in params[key].keys():
                ntimes_groups.append(params[key]['True'])
                groups.append(key)
            else:
                # Only False
                ntimes_groups.append(0)
                groups.append(key)

    # Normalize the values in order to not make figure to large
    if normalization_factor is None:
        normalization_factor = max(ntimes_groups)
    normalization_factor = float(normalization_factor)  # Needed for percentages
    ntimes_groups = [x / normalization_factor for x in ntimes_groups]

    # Create the figure for the barchart
    plt.rcdefaults()
    fig, ax = plt.subplots()
    fig.set_figwidth(figwidth)
    fig.set_figheight(figwidth)
    ax.set_xlim(0, 1)

    # Determine positions of all the labels
    y_pos = np.arange(len(groups) * spacing)
    ntimes_groups_plot = list()
    groups_plot = list()
    num = 0
    for i in range(len(groups) * spacing):
        if i % spacing == 0:
            ntimes_groups_plot.append(ntimes_groups[num])
            groups_plot.append(groups[num])
            num += 1
        else:
            # empty entry to fill up spacing
            ntimes_groups_plot.append(0.0)
            groups_plot.append('')

    # Normal features
    colors = ['steelblue', 'lightskyblue']
    ax.barh(y_pos, ntimes_groups_plot, align='center',
            color=colors[0], ecolor='black')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(groups_plot)

    ax.tick_params(axis='both', labelsize=fontsize)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Percentage', fontsize=fontsize)

    return fig


[docs]def count_parameters(parameters):
    # Count for every parameter how many times a setting occurs
    output = dict()
    for setting, values in parameters.items():
        output[setting] = dict()
        try:
            c = Counter(values)
            for k, v in zip(c.keys(), c.values()):
                output[setting][k] = v
        except TypeError:
            # Not possible to count parameters, remove
            del output[setting]

    return output


[docs]def paracheck(parameters):
    # NOTE: Deprecated
    output = dict()
    # print parameters

    f = parameters['semantic_features']
    total = float(len(f))
    count_semantic = sum([i == 'True' for i in f])
    ratio_semantic = count_semantic/total
    print("Semantic: " + str(ratio_semantic))
    output['semantic_features'] = ratio_semantic

    f = parameters['patient_features']
    count_patient = sum([i == 'True' for i in f])
    ratio_patient = count_patient/total
    print("patient: " + str(ratio_patient))
    output['patient_features'] = ratio_patient

    f = parameters['orientation_features']
    count_orientation = sum([i == 'True' for i in f])
    ratio_orientation = count_orientation/total
    print("orientation: " + str(ratio_orientation))
    output['orientation_features'] = ratio_orientation

    f = parameters['histogram_features']
    count_histogram = sum([i == 'True' for i in f])
    ratio_histogram = count_histogram/total
    print("histogram: " + str(ratio_histogram))
    output['histogram_features'] = ratio_histogram

    f = parameters['shape_features']
    count_shape = sum([i == 'True' for i in f])
    ratio_shape = count_shape/total
    print("shape: " + str(ratio_shape))
    output['shape_features'] = ratio_shape

    if 'coliage_features' in parameters.keys():
        f = parameters['coliage_features']
        count_coliage = sum([i == 'True' for i in f])
        ratio_coliage = count_coliage/total
        print("coliage: " + str(ratio_coliage))
        output['coliage_features'] = ratio_coliage

    if 'phase_features' in parameters.keys():
        f = parameters['phase_features']
        count_phase = sum([i == 'True' for i in f])
        ratio_phase = count_phase/total
        print("phase: " + str(ratio_phase))
        output['phase_features'] = ratio_phase

    if 'vessel_features' in parameters.keys():
        f = parameters['vessel_features']
        count_vessel = sum([i == 'True' for i in f])
        ratio_vessel = count_vessel/total
        print("vessel: " + str(ratio_vessel))
        output['vessel_features'] = ratio_vessel

    if 'log_features' in parameters.keys():
        f = parameters['log_features']
        count_log = sum([i == 'True' for i in f])
        ratio_log = count_log/total
        print("log: " + str(ratio_log))
        output['log_features'] = ratio_log

    f = parameters['texture_features']
    count_texture_all = sum([i == 'True' for i in f])
    ratio_texture_all = count_texture_all/total
    print("texture_all: " + str(ratio_texture_all))
    output['texture_all_features'] = ratio_texture_all

    count_texture_no = sum([i == 'False' for i in f])
    ratio_texture_no = count_texture_no/total
    print("texture_no: " + str(ratio_texture_no))
    output['texture_no_features'] = ratio_texture_no

    count_texture_Gabor = sum([i == 'Gabor' for i in f])
    ratio_texture_Gabor = count_texture_Gabor/total
    print("texture_Gabor: " + str(ratio_texture_Gabor))
    output['texture_Gabor_features'] = ratio_texture_Gabor

    count_texture_LBP = sum([i == 'LBP' for i in f])
    ratio_texture_LBP = count_texture_LBP/total
    print("texture_LBP: " + str(ratio_texture_LBP))
    output['texture_LBP_features'] = ratio_texture_LBP

    count_texture_GLCM = sum([i == 'GLCM' for i in f])
    ratio_texture_GLCM = count_texture_GLCM/total
    print("texture_GLCM: " + str(ratio_texture_GLCM))
    output['texture_GLCM_features'] = ratio_texture_GLCM

    count_texture_GLRLM = sum([i == 'GLRLM' for i in f])
    ratio_texture_GLRLM = count_texture_GLRLM/total
    print("texture_GLRLM: " + str(ratio_texture_GLRLM))
    output['texture_GLRLM_features'] = ratio_texture_GLRLM

    count_texture_GLSZM = sum([i == 'GLSZM' for i in f])
    ratio_texture_GLSZM = count_texture_GLSZM/total
    print("texture_GLSZM: " + str(ratio_texture_GLSZM))
    output['texture_GLSZM_features'] = ratio_texture_GLSZM

    count_texture_NGTDM = sum([i == 'NGTDM' for i in f])
    ratio_texture_NGTDM = count_texture_NGTDM/total
    print("texture_NGTDM: " + str(ratio_texture_NGTDM))
    output['texture_NGTDM_features'] = ratio_texture_NGTDM

    if 'degree' in parameters.keys():
        f = parameters['degree']
        print("Polynomial Degree: " + str(np.mean(f)))
        output['polynomial_degree'] = np.mean(f)

    return output


[docs]def main():
    parser = argparse.ArgumentParser(description='Plot a Barchart.')
    parser.add_argument('-prediction', '--prediction', metavar='prediction',
                        nargs='+', dest='prediction', type=str, required=True,
                        help='Prediction file (HDF)')
    parser.add_argument('-estimators', '--estimators', metavar='estimator',
                        nargs='+', dest='estimators', type=str, required=False,
                        help='Number of estimators to evaluate in each cross validation.')
    parser.add_argument('-label_type', '--label_type', metavar='label_type',
                        nargs='+', dest='label_type', type=str, required=False,
                        help='Key of the label which was predicted.')
    parser.add_argument('-output_tex', '--output_tex', metavar='output_tex',
                        nargs='+', dest='output_tex', type=str, required=True,
                        help='Output file path (.tex)')
    parser.add_argument('-output_png', '--output_png', metavar='output_png',
                        nargs='+', dest='output_png', type=str, required=True,
                        help='Output file path (.png)')
    args = parser.parse_args()

    # Convert the inputs to the correct format
    if type(args.prediction) is list:
        args.prediction = ''.join(args.prediction)

    if type(args.output) is list:
        args.output = ''.join(args.output)

    if type(args.estimators) is list:
        args.estimators = int(args.estimators[0])

    if type(args.label_type) is list:
        args.label_type = ''.join(args.label_type)

    plot_barchart(prediction=args.prediction,
                  estimators=args.estimators,
                  label_type=args.label_type,
                  output_tex=args.output_tex,
                  output_png=args.output_png)


if __name__ == '__main__':
    main()