Source code for mgkit.plots.boxplot

"""
.. versionadded:: 0.1.14

Code related to boxplots
"""
from __future__ import division
from builtins import zip
import logging
import numpy
from ..utils.common import deprecated
from .colors import float_to_hex_color

LOG = logging.getLogger(__name__)

from matplotlib.patches import Polygon

try:
    import seaborn as sns
except ImportError:
    sns = None

DEFAULT_BOXPLOT_FONTCONF = {
    'rotation': 'vertical',
    'fontsize': 8
}

DEFAULT_BOXPLOT_COLOURS = {
    'boxes': '#636363',
    'medians': '#f0f0f0',
    'whiskers': '#636363',
    'caps': 'black',
    'fliers': '#636363',
    'vals': '#636363',
}


[docs]def boxplot_dataframe_multindex(dataframe, axes, plot_order=None, label_map=None, fonts=None, fill_box=True, colours=None, data_colours=None, box_vert=True): """ .. versionadded:: 0.1.13 .. todo:: documentation The function draws a series of boxplots from a DataFrame object, whose order is directed by the iterable plot_order. The columns of each DataFrame row contains the values for each boxplot. An axes object is needed. :param dataframe: dataframe to plot :param iterable plot_order: row order used to plot the boxes :param axes: an axes instance :param dict label_map: a map that converts the items in plot_order to a label used on the plot X axes :param dict fonts: dictionary with properties for x axis labels, :data:`DEFAULT_BOXPLOT_FONTCONF` is used by default :param bool fill_box: if True each box is filled with the same colour of its outline :param dict colours: dictionary with properties for each boxplot if data_colours is None, whi overrides box, whiskers and fliers. Defaults to :data:`DEFAULT_BOXPLOT_COLOURS` :param dict data_colours: dictionary of colours for each boxplot, a set of colours can be obtained using func:`map_taxon_to_colours` :return: the plot data same as matplotlib boxplot function """ if colours is not None: colours = dict( (feature, colours[feature]) if feature in colours else (feature, colour) for feature, colour in DEFAULT_BOXPLOT_COLOURS.items() ) DEFAULT_BOXPLOT_COLOURS.copy().update(colours) else: colours = DEFAULT_BOXPLOT_COLOURS.copy() if fonts is not None: fonts = dict( (feature, fonts[feature]) if feature in fonts else (feature, option) for feature, option in DEFAULT_BOXPLOT_FONTCONF.items() ) DEFAULT_BOXPLOT_FONTCONF.copy().update(fonts) else: fonts = DEFAULT_BOXPLOT_FONTCONF.copy() categories = set(dataframe.index.get_level_values(1)) if (data_colours is None) and (sns is not None): data_colours = dict( zip( categories, sns.color_palette("hls", len(categories)) ) ) if plot_order is None: plot_order = dataframe.index if label_map is None: label_map = [] for label in dataframe.index.get_level_values(0): if label in label_map: continue label_map.append(label) plot_data = axes.boxplot( [dataframe.loc[x].dropna() for x in plot_order], vert=box_vert ) for idx, row_id in enumerate(plot_order): category = row_id[1] box = plot_data['boxes'][idx] box.set_color( data_colours[category] if data_colours else colours['boxes'] ) if fill_box: box_coord = list(zip(box.get_xdata(), box.get_ydata())) polygon = Polygon( box_coord, facecolor=data_colours[category] if data_colours else colours['boxes'] ) axes.add_patch(polygon) plot_data['medians'][idx].set_color(colours['medians']) # It's got a different length (double the size of plot_order) for idx, tx in enumerate(plot_data['whiskers']): whisker = plot_data['whiskers'][idx] whisker.set_color( # data_colours[tx] if data_colours else colours['whiskers'] colours['whiskers'] ) plot_data['caps'][idx].set_color(colours['caps']) for flier in plot_data['fliers']: flier.set_markerfacecolor( colours['fliers'] # data_colours[tx] if data_colours else colours['fliers'] ) if box_vert: ltick_setfunc = axes.set_xticklabels vtick_getfunc = axes.get_yticklabels ptick_setfunc = axes.set_xticks else: ltick_setfunc = axes.set_yticklabels vtick_getfunc = axes.get_xticklabels ptick_setfunc = axes.set_yticks if fonts is not None: ptick_setfunc( numpy.arange( numpy.arange(1, len(categories) + 1).mean(), len(dataframe.index), len(categories) ) ) ltick_setfunc( label_map, rotation=fonts['rotation'], fontsize=fonts['fontsize'] ) for label in vtick_getfunc(): label.set_fontsize(fonts['fontsize']) else: ltick_setfunc([]) return plot_data
[docs]def add_values_to_boxplot(dataframe, ax, plot_data, plot_order, data_colours=None, alpha=0.5, s=80, marker='o', linewidth=0.01, box_vert=False): """ .. versionadded:: 0.1.13 .. versionchanged:: 0.1.14 added *box_vert* parameter .. versionchanged:: 0.1.16 changed default value for *linewidth* Adds the values of a dataframe used in :func:`boxplot_dataframe` to the plot. *linewidth* must be higher than 0 if a marker like *|* is used. A list of markers is available at `this page <http://matplotlib.org/api/markers_api.html>`_ .. warning:: Contrary to :func:`boxplot_dataframe`, the boxplot default is horizontal (*box_vert*). The default will change in a later version. Arguments: dataframe: dataframe with the values to plot ax: an axis instance plot_data: return value from :func:`boxplot_dataframe` plot_order (iterable): row order used to plot the boxes data_colours (dict): colors used for the values alpha (float): alpha value for the colour s (int): size of the marker drawn marker (str): one of the accepted matplotlib markers linewidth (float): width of the line used to draw the marker box_vert (bool): specify if the original boxplot is vertical or not """ if data_colours is not None: # in case each color is a tuple of rgb floats, it converts them into # strings to avoid matplotlit to confuse the single color as different # shades of grey. It only happens when the number of data points in a # row is 3 if not isinstance(data_colours[list(data_colours.keys())[0]], str): data_colours = dict( (key, float_to_hex_color(*value)) for key, value in data_colours.items() ) for index, row_id in enumerate(plot_order): if box_vert: xvals = plot_data['medians'][index].get_xdata() mean_x = xvals.mean() y = dataframe.loc[row_id].dropna() x = [mean_x] * dataframe.loc[row_id].count() else: yvals = plot_data['medians'][index].get_ydata() mean_y = yvals.mean() x = dataframe.loc[row_id].dropna() y = [mean_y] * dataframe.loc[row_id].count() if data_colours is None: DEFAULT_BOXPLOT_COLOURS['boxes'] else: color = data_colours[row_id] ax.scatter( x, y, c=color, alpha=alpha, s=s, marker=marker, linewidth=linewidth, # this option put the dots below the lines of the boxplot zorder=1 )
[docs]def boxplot_dataframe(dataframe, plot_order, ax, label_map=None, fonts=None, fill_box=True, colours=None, data_colours=None, box_vert=True, widths=0.5): """ .. versionadded:: 0.1.7 To move from an all-in-one drawing to a more modular one. .. versionchanged:: 0.1.13 added box_vert parameter .. versionchanged:: 0.1.16 added *widths* parameter The function draws a series of boxplots from a DataFrame object, whose order is directed by the iterable plot_order. The columns of each DataFrame row contains the values for each boxplot. An ax object is needed. :param dataframe: dataframe to plot :param iterable plot_order: row order used to plot the boxes :param ax: an axis instance :param dict label_map: a map that converts the items in plot_order to a label used on the plot X ax :param dict fonts: dictionary with properties for x axis labels, :data:`DEFAULT_BOXPLOT_FONTCONF` is used by default :param bool fill_box: if True each box is filled with the same colour of its outline :param dict colours: dictionary with properties for each boxplot if data_colours is None, whi overrides box, whiskers and fliers. Defaults to :data:`DEFAULT_BOXPLOT_COLOURS` :param dict data_colours: dictionary of colours for each boxplot, a set of colours can be obtained using func:`map_taxon_to_colours` :param bool box_vert: if False the boxplots are drawn horizontally :param float widths: width (scalar or array) of the boxplots width(s) :return: the plot data; same as matplotlib boxplot function """ if colours is not None: colours = dict( (feature, colours[feature]) if feature in colours else (feature, colour) for feature, colour in DEFAULT_BOXPLOT_COLOURS.items() ) # DEFAULT_BOXPLOT_COLOURS.copy().update(colours) else: colours = DEFAULT_BOXPLOT_COLOURS.copy() if fonts is not None: fonts = dict( (feature, fonts[feature]) if feature in fonts else (feature, option) for feature, option in DEFAULT_BOXPLOT_FONTCONF.items() ) DEFAULT_BOXPLOT_FONTCONF.copy().update(fonts) else: fonts = DEFAULT_BOXPLOT_FONTCONF.copy() plot_data = ax.boxplot( [dataframe.loc[x].dropna() for x in plot_order], vert=box_vert, widths=widths ) for idx, row_id in enumerate(plot_order): box = plot_data['boxes'][idx] box.set_color( data_colours[row_id] if data_colours else colours['boxes'] ) if fill_box: box_coord = list(zip(box.get_xdata(), box.get_ydata())) polygon = Polygon( box_coord, facecolor=data_colours[row_id] if data_colours else colours['boxes'] ) ax.add_patch(polygon) plot_data['medians'][idx].set_color(colours['medians']) # It's got a different length (double the size of plot_order) for idx, tx in enumerate(plot_data['whiskers']): whisker = plot_data['whiskers'][idx] whisker.set_color( # data_colours[tx] if data_colours else colours['whiskers'] colours['whiskers'] ) plot_data['caps'][idx].set_color(colours['caps']) for flier in plot_data['fliers']: flier.set_color( colours['whiskers'] # data_colours[tx] if data_colours else colours['fliers'] ) if box_vert: ltick_setfunc = ax.set_xticklabels vtick_getfunc = ax.get_yticklabels else: ltick_setfunc = ax.set_yticklabels vtick_getfunc = ax.get_xticklabels if fonts is not None: ltick_setfunc( [ label if label_map is None else label_map[label] for label in plot_order ], rotation=fonts['rotation'], fontsize=fonts['fontsize'] ) for label in vtick_getfunc(): label.set_fontsize(fonts['fontsize']) else: ltick_setfunc([]) return plot_data
[docs]def add_significance_to_boxplot(sign_indices, ax, pos, box_vert=True, fontsize=16): """ .. versionadded:: 0.1.16 Add significance groups to boxplots Arguments: sign_indices (iterable): iterable in which each element is a tuple; each element of the tuple is the numerical index of the position of the significant boxplot ax: an axis instance pos (tuple): the 2 values are the coordinates for the top line, and the the lowest bound for the whisker box_vert (bool): if the boxplot is vertical fontsize (float): size for the * (star) """ maxval, spine = pos text = maxval for index, (sign1, sign2) in enumerate(sign_indices): spacer = (index * ((maxval - spine) * 2.5)) factors = [spine, maxval] x = numpy.array( factors + factors[::-1] ) - spacer y = [sign1 + 1, sign1 + 1, sign2 + 1, sign2 + 1] if box_vert: x, y = y, x ax.plot(x, y, linestyle='-', c='k', alpha=.75) if box_vert: xtext, ytext = numpy.mean(x), text - spacer else: xtext, ytext = text - spacer, numpy.mean(y) ax.text(xtext, ytext, '*', fontsize=fontsize)
__all__ = [ 'add_values_to_boxplot', 'add_significance_to_boxplot', 'boxplot_dataframe_multindex', 'boxplot_dataframe' ]