Source code for WORC.plotting.plot_pvalues_features

#!/usr/bin/env python

# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

import numpy as np
import tikzplotlib


[docs]def manhattan_importance(values, labels, feature_labels,
                         output_png=None, output_tex=None,
                         mapping=None, threshold_annotated=0.05):
    # Assume labels are numeric and sorted
    f = plt.figure(figsize=(20, 10))

    # Generate X-positions
    positions = np.arange(len(values))

    # Initialize several objects
    values = np.asarray(values)
    unique_labels = set(list(labels))
    n_labels = len(unique_labels)
    colormap = ['#7dcfe2', '#4b78b5', 'darkgrey', 'dimgray'] * n_labels

    for lnum, i in enumerate(unique_labels):
        # Shift positions for each class
        for pnum in range(len(positions)):
            if labels[pnum] == i:
                positions[pnum] += lnum

        # NOTE: use lnum to leave space between labels for vlines
        # Only take first set of points corresponding to first label
        plot_positions = [p for p, l in zip(positions, labels) if l == i]
        plot_values = [v for v, l in zip(values, labels) if l == i]
        plt.scatter(plot_positions, plot_values, c=colormap[lnum])

    # Set a line after each label ends
    label_previous = labels[0]
    pos_previous = positions[0]
    color_end = list()
    vlines = list()
    # NOTE: leave space between groups to plot vline
    for i, p in zip(labels, positions):
        if i != label_previous:
            # New color starts here
            color_end.append((pos_previous + p) / 2.0)
            label_previous = i
            pos_previous = p

            # Plot vlines just between classes
            vlines.append(p - 1)

    # Add the last color and line
    color_end.append((pos_previous + p) / 2.0)

    # Decide y-limits
    ymax = np.max(values)
    for i in range(0, 100):
        if 10**(-i) < ymax:
            ymaxlim = i - 1
            break

    ymin = np.min(values) # might be zero
    yposmin = max(np.min(values[values > 0]), np.finfo(values.dtype).eps)
    for i in range(0, 100):
        if 10**(-i) < (ymin if ymin > 0 else yposmin):
            yminlim = i
            break

    # Set several figure lay-out options
    plt.gca().invert_yaxis()
    if ymin > 0:
        plt.yscale('log')
        plt.ylim((10**-ymaxlim, 10**-yminlim))
    else:
        plt.yscale('symlog', linthresh=10**-yminlim)
        plt.ylim((10**-ymaxlim, 0.0))
    plt.xlim((0, max(positions)))

    plt.yticks([10**-i for i in range(ymaxlim, yminlim + 1)],
               [f'10-{i}' for i in range(ymaxlim, yminlim + 1)])
    if mapping is None:
        # Use raw labels (=numbers) as ticks
        plt.xticks(color_end, np.arange(len(color_end)) + 1, size=16)
    else:
        xticks = [mapping[i] for i in unique_labels]
        plt.xticks(color_end, xticks, size=8)

    plt.vlines(vlines, 10**-ymaxlim, 10**-yminlim,  linestyles='dotted', linewidth=0.3)

    if threshold_annotated > 10**-yminlim:
        y_value_annotated = threshold_annotated
        plt.hlines(threshold_annotated, 0, max(positions),  linestyles='dashed', linewidth=1, color='magenta')
    else:
        y_value_annotated = 10**-yminlim
        plt.hlines(10**-yminlim, 0, max(positions),  linestyles='dashed', linewidth=1, color='magenta')

    plt.annotate(f'p={round(threshold_annotated, 5)}',
                 (1, y_value_annotated),
                 xytext=(1, y_value_annotated*0.95), size=8, color='magenta')

    if 0.05 > 10**-yminlim:
        plt.hlines(0.05, 0, max(positions),  linestyles='dashed', linewidth=1, color='magenta')
        plt.annotate('p=0.05',
                     (1, 0.05),
                     xytext=(1, 0.05*0.95), size=8, color='magenta')

    plt.xlabel("Feature groups", size=12)
    plt.ylabel("P-value Mann-Whitney U", size=12)

    # Annotate points above the threshold
    offset = len(values) / 200
    offset = np.clip(offset, 0.1, 100)

    annotated_values = [v for v in values if v < threshold_annotated]
    annotated_pos = [p for p, v in zip(positions, values) if v < threshold_annotated]
    annotated_labels = [p for p, v in zip(feature_labels, values) if v < threshold_annotated]

    y_offset = -0.1
    for x, y, text in zip(annotated_pos, annotated_values, annotated_labels):
        plt.annotate(text,
                     (x, y),
                     xytext=(x + offset, y * (1 - y_offset)), size=6)
        y_offset = y_offset * -1

    plt.gca().spines['right'].set_color('none')
    plt.gca().spines['top'].set_color('none')

    if output_png is not None:
        plt.savefig(output_png, bbox_inches='tight', pad_inches=0)
        print(f"Plot saved as {output_png}!")

    if output_tex is not None:
        tikzplotlib.save(output_tex)
        print(f"Plot saved as {output_tex}!")

    return f