diff --git a/doc/api/api_changes/2016-03-13-PMH.rst b/doc/api/api_changes/2016-03-13-PMH.rst new file mode 100644 index 00000000000..209df726cd0 --- /dev/null +++ b/doc/api/api_changes/2016-03-13-PMH.rst @@ -0,0 +1,10 @@ +Changed default ``autorange`` behavior in boxplots +`````````````````````````````````````````````````` + +Prior to v1.5.2, the whiskers of boxplots would extend to the mininum +and maximum values if the quartiles were all equal (i.e., Q1 = median += Q3). This behavior has been disabled by default to restore consistency +with other plotting packages. + +To restore the old behavior, simply set ``autorange=True`` when +calling ``plt.boxplot``. diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index f923c8ca0fe..21fa636ba52 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -3050,9 +3050,10 @@ def boxplot(self, x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, - showbox=None, showfliers=None, boxprops=None, labels=None, - flierprops=None, medianprops=None, meanprops=None, - capprops=None, whiskerprops=None, manage_xticks=True): + showbox=None, showfliers=None, boxprops=None, + labels=None, flierprops=None, medianprops=None, + meanprops=None, capprops=None, whiskerprops=None, + manage_xticks=True, autorange=False): """ Make a box and whisker plot. @@ -3062,12 +3063,13 @@ def boxplot(self, x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=False, bootstrap=None, usermedians=None, conf_intervals=None, meanline=False, showmeans=False, showcaps=True, - showbox=True, showfliers=True, boxprops=None, labels=None, - flierprops=None, medianprops=None, meanprops=None, - capprops=None, whiskerprops=None, manage_xticks=True): + showbox=True, showfliers=True, boxprops=None, + labels=None, flierprops=None, medianprops=None, + meanprops=None, capprops=None, whiskerprops=None, + manage_xticks=True, autorange=False): - Make a box and whisker plot for each column of *x* or each - vector in sequence *x*. The box extends from the lower to + Make a box and whisker plot for each column of ``x`` or each + vector in sequence ``x``. The box extends from the lower to upper quartile values of the data, with a line at the median. The whiskers extend from the box to show the range of the data. Flier points are those past the end of the whiskers. @@ -3075,140 +3077,145 @@ def boxplot(self, x, notch=None, sym=None, vert=None, whis=None, Parameters ---------- x : Array or a sequence of vectors. - The input data. - - notch : bool, default = False - If False, produces a rectangular box plot. - If True, will produce a notched box plot - - sym : str or None, default = None - The default symbol for flier points. - Enter an empty string ('') if you don't want to show fliers. - If `None`, then the fliers default to 'b+' If you want more - control use the flierprops kwarg. - - vert : bool, default = True - If True (default), makes the boxes vertical. - If False, makes horizontal boxes. - - whis : float, sequence (default = 1.5) or string - As a float, determines the reach of the whiskers past the first - and third quartiles (e.g., Q3 + whis*IQR, IQR = interquartile - range, Q3-Q1). Beyond the whiskers, data are considered outliers - and are plotted as individual points. Set this to an unreasonably - high value to force the whiskers to show the min and max values. - Alternatively, set this to an ascending sequence of percentile - (e.g., [5, 95]) to set the whiskers at specific percentiles of - the data. Finally, *whis* can be the string 'range' to force the - whiskers to the min and max of the data. In the edge case that - the 25th and 75th percentiles are equivalent, *whis* will be - automatically set to 'range'. - - bootstrap : None (default) or integer - Specifies whether to bootstrap the confidence intervals - around the median for notched boxplots. If bootstrap==None, - no bootstrapping is performed, and notches are calculated - using a Gaussian-based asymptotic approximation (see McGill, R., - Tukey, J.W., and Larsen, W.A., 1978, and Kendall and Stuart, - 1967). Otherwise, bootstrap specifies the number of times to - bootstrap the median to determine it's 95% confidence intervals. - Values between 1000 and 10000 are recommended. - - usermedians : array-like or None (default) - An array or sequence whose first dimension (or length) is - compatible with *x*. This overrides the medians computed by - matplotlib for each element of *usermedians* that is not None. - When an element of *usermedians* == None, the median will be - computed by matplotlib as normal. - - conf_intervals : array-like or None (default) - Array or sequence whose first dimension (or length) is compatible - with *x* and whose second dimension is 2. When the current element - of *conf_intervals* is not None, the notch locations computed by - matplotlib are overridden (assuming notch is True). When an - element of *conf_intervals* is None, boxplot compute notches the - method specified by the other kwargs (e.g., *bootstrap*). - - positions : array-like, default = [1, 2, ..., n] - Sets the positions of the boxes. The ticks and limits - are automatically set to match the positions. - - widths : array-like, default = 0.5 - Either a scalar or a vector and sets the width of each box. The - default is 0.5, or ``0.15*(distance between extreme positions)`` - if that is smaller. - - labels : sequence or None (default) - Labels for each dataset. Length must be compatible with - dimensions of *x* - - patch_artist : bool, default = False - If False produces boxes with the Line2D artist - If True produces boxes with the Patch artist - - showmeans : bool, default = False - If True, will toggle on the rendering of the means - - showcaps : bool, default = True - If True, will toggle on the rendering of the caps - - showbox : bool, default = True - If True, will toggle on the rendering of the box - - showfliers : bool, default = True - If True, will toggle on the rendering of the fliers - - boxprops : dict or None (default) - If provided, will set the plotting style of the boxes - - whiskerprops : dict or None (default) - If provided, will set the plotting style of the whiskers + The input data. + + notch : bool, optional (False) + If `True`, will produce a notched box plot. Otherwise, a + rectangular boxplot is produced. + + sym : str, optional + The default symbol for flier points. Enter an empty string + ('') if you don't want to show fliers. If `None`, then the + fliers default to 'b+' If you want more control use the + flierprops kwarg. + + vert : bool, optional (True) + If `True` (default), makes the boxes vertical. If `False`, + everything is drawn horizontally. + + whis : float, sequence, or string (default = 1.5) + As a float, determines the reach of the whiskers past the + first and third quartiles (e.g., Q3 + whis*IQR, + IQR = interquartile range, Q3-Q1). Beyond the whiskers, data + are considered outliers and are plotted as individual + points. Set this to an unreasonably high value to force the + whiskers to show the min and max values. Alternatively, set + this to an ascending sequence of percentile (e.g., [5, 95]) + to set the whiskers at specific percentiles of the data. + Finally, ``whis`` can be the string ``'range'`` to force the + whiskers to the min and max of the data. + + bootstrap : int, optional + Specifies whether to bootstrap the confidence intervals + around the median for notched boxplots. If `bootstrap` is None, + no bootstrapping is performed, and notches are calculated + using a Gaussian-based asymptotic approximation (see McGill, + R., Tukey, J.W., and Larsen, W.A., 1978, and Kendall and + Stuart, 1967). Otherwise, bootstrap specifies the number of + times to bootstrap the median to determine its 95% + confidence intervals. Values between 1000 and 10000 are + recommended. + + usermedians : array-like, optional + An array or sequence whose first dimension (or length) is + compatible with ``x``. This overrides the medians computed + by matplotlib for each element of ``usermedians`` that is not + `None`. When an element of ``usermedians`` is None, the median + will be computed by matplotlib as normal. + + conf_intervals : array-like, optional + Array or sequence whose first dimension (or length) is + compatible with ``x`` and whose second dimension is 2. When + the an element of ``conf_intervals`` is not None, the + notch locations computed by matplotlib are overridden + (provided ``notch`` is `True`). When an element of + ``conf_intervals`` is `None`, the notches are computed by the + method specified by the other kwargs (e.g., ``bootstrap``). + + positions : array-like, optional + Sets the positions of the boxes. The ticks and limits are + automatically set to match the positions. Defaults to + `range(1, N+1)` where N is the number of boxes to be drawn. + + widths : scalar or array-like + Sets the width of each box either with a scalar or a + sequence. The default is 0.5, or ``0.15*(distance between + extreme positions)``, if that is smaller. + + patch_artist : bool, optional (False) + If `False` produces boxes with the Line2D artist. Otherwise, + boxes and drawn with Patch artists. + + labels : sequence, optional + Labels for each dataset. Length must be compatible with + dimensions of ``x``. + + manage_xticks : bool, optional (True) + If the function should adjust the xlim and xtick locations. - capprops : dict or None (default) - If provided, will set the plotting style of the caps + autorange : bool, optional (False) + When `True` and the data are distributed such that the 25th and + 75th percentiles are equal, ``whis`` is set to ``'range'`` such + that the whisker ends are at the minimum and maximum of the + data. + + meanline : bool, optional (False) + If `True` (and ``showmeans`` is `True`), will try to render + the mean as a line spanning the full width of the box + according to ``meanprops`` (see below). Not recommended if + ``shownotches`` is also True. Otherwise, means will be shown + as points. + + Additional Options + --------------------- + The following boolean options toggle the drawing of individual + components of the boxplots: + - showcaps: the caps on the ends of whiskers + (default is True) + - showbox: the central box (default is True) + - showfliers: the outliers beyond the caps (default is True) + - showmeans: the arithmetic means (default is False) + + The remaining options can accept dictionaries that specify the + style of the individual artists: + - capprops + - boxprops + - whiskerprops + - flierprops + - medianprops + - meanprops - flierprops : dict or None (default) - If provided, will set the plotting style of the fliers + Returns + ------- + result : dict + A dictionary mapping each component of the boxplot to a list + of the :class:`matplotlib.lines.Line2D` instances + created. That dictionary has the following keys (assuming + vertical boxplots): - medianprops : dict or None (default) - If provided, will set the plotting style of the medians + - ``boxes``: the main body of the boxplot showing the + quartiles and the median's confidence intervals if + enabled. - meanprops : dict or None (default) - If provided, will set the plotting style of the means + - ``medians``: horizontal lines at the median of each box. - meanline : bool, default = False - If True (and *showmeans* is True), will try to render the mean - as a line spanning the full width of the box according to - *meanprops*. Not recommended if *shownotches* is also True. - Otherwise, means will be shown as points. + - ``whiskers``: the vertical lines extending to the most + extreme, non-outlier data points. - manage_xticks : bool, default = True - If the function should adjust the xlim and xtick locations. + - ``caps``: the horizontal lines at the ends of the + whiskers. - Returns - ------- + - ``fliers``: points representing data that extend beyond + the whiskers (fliers). - result : dict - A dictionary mapping each component of the boxplot - to a list of the :class:`matplotlib.lines.Line2D` - instances created. That dictionary has the following keys - (assuming vertical boxplots): - - - boxes: the main body of the boxplot showing the quartiles - and the median's confidence intervals if enabled. - - medians: horizonal lines at the median of each box. - - whiskers: the vertical lines extending to the most extreme, - n-outlier data points. - - caps: the horizontal lines at the ends of the whiskers. - - fliers: points representing data that extend beyond the - whiskers (outliers). - - means: points or lines representing the means. + - ``means``: points or lines representing the means. Examples -------- - .. plot:: mpl_examples/statistics/boxplot_demo.py + """ + # If defined in matplotlibrc, apply the value from rc file # Overridden if argument is passed if whis is None: @@ -3216,7 +3223,7 @@ def boxplot(self, x, notch=None, sym=None, vert=None, whis=None, if bootstrap is None: bootstrap = rcParams['boxplot.bootstrap'] bxpstats = cbook.boxplot_stats(x, whis=whis, bootstrap=bootstrap, - labels=labels) + labels=labels, autorange=autorange) if notch is None: notch = rcParams['boxplot.notch'] if vert is None: @@ -3462,10 +3469,10 @@ def bxp(self, bxpstats, positions=None, widths=None, vert=True, quartiles and the median's confidence intervals if enabled. - - ``medians``: horizonal lines at the median of each box. + - ``medians``: horizontal lines at the median of each box. - ``whiskers``: the vertical lines extending to the most - extreme, n-outlier data points. + extreme, non-outlier data points. - ``caps``: the horizontal lines at the ends of the whiskers. diff --git a/lib/matplotlib/cbook.py b/lib/matplotlib/cbook.py index f122e975c80..67309a6c610 100644 --- a/lib/matplotlib/cbook.py +++ b/lib/matplotlib/cbook.py @@ -1760,39 +1760,46 @@ def delete_masked_points(*args): return margs -def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None): - ''' - Returns list of dictionaries of staticists to be use to draw a series of - box and whisker plots. See the `Returns` section below to the required - keys of the dictionary. Users can skip this function and pass a user- - defined set of dictionaries to the new `axes.bxp` method instead of - relying on MPL to do the calcs. +def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None, + autorange=False): + """ + Returns list of dictionaries of statistics used to draw a series + of box and whisker plots. The `Returns` section enumerates the + required keys of the dictionary. Users can skip this function and + pass a user-defined set of dictionaries to the new `axes.bxp` method + instead of relying on MPL to do the calculations. Parameters ---------- X : array-like - Data that will be represented in the boxplots. Should have 2 or fewer - dimensions. + Data that will be represented in the boxplots. Should have 2 or + fewer dimensions. whis : float, string, or sequence (default = 1.5) - As a float, determines the reach of the whiskers past the first and - third quartiles (e.g., Q3 + whis*IQR, QR = interquartile range, Q3-Q1). - Beyond the whiskers, data are considered outliers and are plotted as - individual points. Set this to an unreasonably high value to force the - whiskers to show the min and max data. Alternatively, set this to an - ascending sequence of percentile (e.g., [5, 95]) to set the whiskers - at specific percentiles of the data. Finally, can `whis` be the - string 'range' to force the whiskers to the min and max of the data. - In the edge case that the 25th and 75th percentiles are equivalent, - `whis` will be automatically set to 'range' - - bootstrap : int or None (default) - Number of times the confidence intervals around the median should - be bootstrapped (percentile method). - - labels : sequence - Labels for each dataset. Length must be compatible with dimensions - of `X` + As a float, determines the reach of the whiskers past the first + and third quartiles (e.g., Q3 + whis*IQR, QR = interquartile + range, Q3-Q1). Beyond the whiskers, data are considered outliers + and are plotted as individual points. This can be set this to an + ascending sequence of percentile (e.g., [5, 95]) to set the + whiskers at specific percentiles of the data. Finally, `whis` + can be the string ``'range'`` to force the whiskers to the + minimum and maximum of the data. In the edge case that the 25th + and 75th percentiles are equivalent, `whis` can be automatically + set to ``'range'`` via the `autorange` option. + + bootstrap : int, optional + Number of times the confidence intervals around the median + should be bootstrapped (percentile method). + + labels : array-like, optional + Labels for each dataset. Length must be compatible with + dimensions of `X`. + + autorange : bool, optional (False) + When `True` and the data are distributed such that the 25th and + 75th percentiles are equal, ``whis`` is set to ``'range'`` such + that the whisker ends are at the minimum and maximum of the + data. Returns ------- @@ -1817,8 +1824,8 @@ def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None): Notes ----- - Non-bootstrapping approach to confidence interval uses Gaussian-based - asymptotic approximation: + Non-bootstrapping approach to confidence interval uses Gaussian- + based asymptotic approximation: .. math:: @@ -1828,7 +1835,7 @@ def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None): McGill, R., Tukey, J.W., and Larsen, W.A. (1978) "Variations of Boxplots", The American Statistician, 32:12-16. - ''' + """ def _bootstrap_median(data, N=5000): # determine 95% confidence intervals of the median @@ -1908,7 +1915,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap): # interquartile range stats['iqr'] = q3 - q1 - if stats['iqr'] == 0: + if stats['iqr'] == 0 and autorange: whis = 'range' # conf. interval around median diff --git a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_false_whiskers.png b/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_false_whiskers.png new file mode 100644 index 00000000000..788c38e8eff Binary files /dev/null and b/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_false_whiskers.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.png b/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_true_whiskers.png similarity index 100% rename from lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.png rename to lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_true_whiskers.png diff --git a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.pdf b/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.pdf deleted file mode 100644 index 933a067b2d0..00000000000 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.pdf and /dev/null differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.svg b/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.svg deleted file mode 100644 index 6e3a395ea6e..00000000000 --- a/lib/matplotlib/tests/baseline_images/test_axes/boxplot_autorange_whiskers.svg +++ /dev/null @@ -1,374 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index df0e48e0cef..60f2b1d2f1e 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -1785,14 +1785,22 @@ def test_boxplot_sym(): ax.set_ylim((-30, 30)) -@image_comparison(baseline_images=['boxplot_autorange_whiskers']) +@image_comparison( + baseline_images=['boxplot_autorange_false_whiskers', + 'boxplot_autorange_true_whiskers'], + extensions=['png'], +) def test_boxplot_autorange_whiskers(): x = np.ones(140) x = np.hstack([0, x, 2]) - fig, ax = plt.subplots() - - ax.boxplot([x, x], bootstrap=10000, notch=1) - ax.set_ylim((-5, 5)) + + fig1, ax1 = plt.subplots() + ax1.boxplot([x, x], bootstrap=10000, notch=1) + ax1.set_ylim((-5, 5)) + + fig2, ax2 = plt.subplots() + ax2.boxplot([x, x], bootstrap=10000, notch=1, autorange=True) + ax2.set_ylim((-5, 5)) def _rc_test_bxp_helper(ax, rc_dict): x = np.linspace(-7, 7, 140) diff --git a/lib/matplotlib/tests/test_cbook.py b/lib/matplotlib/tests/test_cbook.py index b7d2e62f1aa..f80af590623 100644 --- a/lib/matplotlib/tests/test_cbook.py +++ b/lib/matplotlib/tests/test_cbook.py @@ -265,6 +265,20 @@ def test_bad_dims(self): data = np.random.normal(size=(34, 34, 34)) results = cbook.boxplot_stats(data) + def test_boxplot_stats_autorange_false(self): + x = np.zeros(shape=140) + x = np.hstack([-25, x, 25]) + bstats_false = cbook.boxplot_stats(x, autorange=False) + bstats_true = cbook.boxplot_stats(x, autorange=True) + + assert_equal(bstats_false[0]['whislo'], 0) + assert_equal(bstats_false[0]['whishi'], 0) + assert_array_almost_equal(bstats_false[0]['fliers'], [-25, 25]) + + assert_equal(bstats_true[0]['whislo'], -25) + assert_equal(bstats_true[0]['whishi'], 25) + assert_array_almost_equal(bstats_true[0]['fliers'], []) + class Test_callback_registry(object): def setup(self):