diff --git a/examples/statistics/violinplot.py b/examples/statistics/violinplot.py index 19d80b494fba..5d26cc63966e 100644 --- a/examples/statistics/violinplot.py +++ b/examples/statistics/violinplot.py @@ -28,7 +28,7 @@ pos = [1, 2, 4, 5, 7, 8] data = [np.random.normal(0, std, size=100) for std in pos] -fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6)) +fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 6)) axes[0, 0].violinplot(data, pos, points=20, widths=0.3, showmeans=True, showextrema=True, showmedians=True) @@ -43,19 +43,42 @@ showextrema=True, showmedians=True, bw_method=0.5) axes[0, 2].set_title('Custom violinplot 3', fontsize=fs) +axes[0, 3].violinplot(data, pos, points=60, widths=0.7, showmeans=True, + showextrema=True, showmedians=True, bw_method=0.5, + quantiles=[[0.1], [], [], [0.175, 0.954], [0.75], + [0.25]]) +axes[0, 3].set_title('Custom violinplot 4', fontsize=fs) + +axes[0, 4].violinplot(data[-1:], pos[-1:], points=60, widths=0.7, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5) +axes[0, 4].set_title('Custom violinplot 5', fontsize=fs) + axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7, showmeans=True, showextrema=True, showmedians=True) -axes[1, 0].set_title('Custom violinplot 4', fontsize=fs) +axes[1, 0].set_title('Custom violinplot 6', fontsize=fs) axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9, showmeans=True, showextrema=True, showmedians=True, bw_method='silverman') -axes[1, 1].set_title('Custom violinplot 5', fontsize=fs) +axes[1, 1].set_title('Custom violinplot 7', fontsize=fs) axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) -axes[1, 2].set_title('Custom violinplot 6', fontsize=fs) +axes[1, 2].set_title('Custom violinplot 8', fontsize=fs) + +axes[1, 3].violinplot(data, pos, points=200, vert=False, widths=1.1, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[[0.1], [], [], [0.175, 0.954], [0.75], + [0.25]], + bw_method=0.5) +axes[1, 3].set_title('Custom violinplot 9', fontsize=fs) + +axes[1, 4].violinplot(data[-1:], pos[-1:], points=200, vert=False, widths=1.1, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5) +axes[1, 4].set_title('Custom violinplot 10', fontsize=fs) for ax in axes.flat: ax.set_yticklabels([]) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 06d945b7cc8f..862c7b7b4eec 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -7880,14 +7880,14 @@ def matshow(self, Z, **kwargs): @_preprocess_data(replace_names=["dataset"]) def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False, showextrema=True, showmedians=False, - points=100, bw_method=None): + quantiles=None, points=100, bw_method=None): """ Make a violin plot. Make a violin plot for each column of *dataset* or each vector in sequence *dataset*. Each filled area extends to represent the entire data range, with optional lines at the mean, the median, - the minimum, and the maximum. + the minimum, the maximum, and user-specified quantiles. Parameters ---------- @@ -7916,6 +7916,11 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmedians : bool, default = False If `True`, will toggle rendering of the medians. + quantiles : array-like, default = None + If not None, set a list of floats in interval [0, 1] for each violin, + which stands for the quantiles that will be rendered for that + violin. + points : scalar, default = 100 Defines the number of points to evaluate each of the gaussian kernel density estimations at. @@ -7953,6 +7958,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, - ``cmedians``: A `~.collections.LineCollection` instance that marks the median values of each of the violin's distribution. + - ``cquantiles``: A `~.collections.LineCollection` instance created + to identify the quantile values of each of the violin's + distribution. + """ def _kde_method(X, coords): @@ -7962,7 +7971,8 @@ def _kde_method(X, coords): kde = mlab.GaussianKDE(X, bw_method) return kde.evaluate(coords) - vpstats = cbook.violin_stats(dataset, _kde_method, points=points) + vpstats = cbook.violin_stats(dataset, _kde_method, points=points, + quantiles=quantiles) return self.violin(vpstats, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, showmedians=showmedians) @@ -7973,7 +7983,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, Draw a violin plot for each column of `vpstats`. Each filled area extends to represent the entire data range, with optional lines at the - mean, the median, the minimum, and the maximum. + mean, the median, the minimum, the maximum, and the quantiles values. Parameters ---------- @@ -7997,6 +8007,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, - ``max``: The maximum value for this violin's dataset. + Optional keys are: + + - ``quantiles``: A list of scalars containing the quantile values + for this violin's dataset. + positions : array-like, default = [1, 2, ..., n] Sets the positions of the violins. The ticks and limits are automatically set to match the positions. @@ -8043,6 +8058,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, - ``cmedians``: A `~.collections.LineCollection` instance that marks the median values of each of the violin's distribution. + + - ``cquantiles``: A `~.collections.LineCollection` instance created + to identify the quantiles values of each of the violin's + distribution. + """ # Statistical quantities to be plotted on the violins @@ -8050,6 +8070,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins = [] maxes = [] medians = [] + quantiles = np.asarray([]) # Collections to be returned artists = {} @@ -8106,6 +8127,10 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins.append(stats['min']) maxes.append(stats['max']) medians.append(stats['median']) + q = stats.get('quantiles') + if q is not None: + # If exist key quantiles, assume it's a list of floats + quantiles = np.concatenate((quantiles, q)) artists['bodies'] = bodies # Render means @@ -8129,6 +8154,22 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, pmaxes, colors=edgecolor) + # Render quantile values + if quantiles.size > 0: + # Recalculate ranges for statistics lines for quantiles. + # ppmins are the left end of quantiles lines + ppmins = np.asarray([]) + # pmaxes are the right end of quantiles lines + ppmaxs = np.asarray([]) + for stats, cmin, cmax in zip(vpstats, pmins, pmaxes): + q = stats.get('quantiles') + if q is not None: + ppmins = np.concatenate((ppmins, [cmin] * np.size(q))) + ppmaxs = np.concatenate((ppmaxs, [cmax] * np.size(q))) + # Start rendering + artists['cquantiles'] = perp_lines(quantiles, ppmins, ppmaxs, + colors=edgecolor) + return artists # Methods that are entirely implemented in other modules. diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index bbeb82b18f1a..3f539b569830 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -1431,7 +1431,7 @@ def _reshape_2D(X, name): raise ValueError("{} must have 2 or fewer dimensions".format(name)) -def violin_stats(X, method, points=100): +def violin_stats(X, method, points=100, quantiles=None): """ Returns a list of dictionaries of data which can be used to draw a series of violin plots. See the `Returns` section below to view the required keys @@ -1455,6 +1455,12 @@ def violin_stats(X, method, points=100): Defines the number of points to evaluate each of the gaussian kernel density estimates at. + quantiles : array-like, default = None + Defines (if not None) a list of floats in interval [0, 1] for each + column of data, which represents the quantiles that will be rendered + for that column of data. Must have 2 or fewer dimensions. 1D array will + be treated as a singleton list containing them. + Returns ------- @@ -1469,6 +1475,7 @@ def violin_stats(X, method, points=100): - median: The median value for this column of data. - min: The minimum value for this column of data. - max: The maximum value for this column of data. + - quantiles: The quantile values for this column of data. """ # List of dictionaries describing each of the violins. @@ -1477,13 +1484,27 @@ def violin_stats(X, method, points=100): # Want X to be a list of data sequences X = _reshape_2D(X, "X") - for x in X: + # Want quantiles to be as the same shape as data sequences + if quantiles is not None and len(quantiles) != 0: + quantiles = _reshape_2D(quantiles, "quantiles") + # Else, mock quantiles if is none or empty + else: + quantiles = [[]] * np.shape(X)[0] + + # quantiles should has the same size as dataset + if np.shape(X)[:1] != np.shape(quantiles)[:1]: + raise ValueError("List of violinplot statistics and quantiles values" + " must have the same length") + + # Zip x and quantiles + for (x, q) in zip(X, quantiles): # Dictionary of results for this distribution stats = {} # Calculate basic stats for the distribution min_val = np.min(x) max_val = np.max(x) + quantile_val = np.percentile(x, 100 * q) # Evaluate the kernel density estimate coords = np.linspace(min_val, max_val, points) @@ -1495,6 +1516,7 @@ def violin_stats(X, method, points=100): stats['median'] = np.median(x) stats['min'] = min_val stats['max'] = max_val + stats['quantiles'] = np.atleast_1d(quantile_val) # Append to output vpstats.append(stats) diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 19476ca75f11..da8f59019867 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -2998,12 +2998,13 @@ def triplot(*args, **kwargs): def violinplot( dataset, positions=None, vert=True, widths=0.5, showmeans=False, showextrema=True, showmedians=False, - points=100, bw_method=None, *, data=None): + quantiles=None, points=100, bw_method=None, *, data=None): return gca().violinplot( dataset, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, - showmedians=showmedians, points=points, bw_method=bw_method, - **({"data": data} if data is not None else {})) + showmedians=showmedians, quantiles=quantiles, points=points, + bw_method=bw_method, **({"data": data} if data is not None + else {})) # Autogenerated by boilerplate.py. Do not edit as changes will be lost. diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png index 95c657bd81d5..b3e807c153d9 100644 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png index cdda9b7e1c65..49383f98bc3e 100644 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png differ diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index fea961a201ee..210d470636c2 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -2661,7 +2661,8 @@ def test_vert_violinplot_showall(): np.random.seed(316624790) data = [np.random.normal(size=100) for i in range(4)] ax.violinplot(data, positions=range(4), showmeans=1, showextrema=1, - showmedians=1) + showmedians=1, + quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]]) @image_comparison(baseline_images=['violinplot_vert_custompoints_10'], @@ -2738,7 +2739,8 @@ def test_horiz_violinplot_showall(): np.random.seed(82762530) data = [np.random.normal(size=100) for i in range(4)] ax.violinplot(data, positions=range(4), vert=False, showmeans=1, - showextrema=1, showmedians=1) + showextrema=1, showmedians=1, + quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]]) @image_comparison(baseline_images=['violinplot_horiz_custompoints_10'], @@ -2781,6 +2783,48 @@ def test_violinplot_bad_widths(): ax.violinplot(data, positions=range(4), widths=[1, 2, 3]) +def test_violinplot_bad_quantiles(): + ax = plt.axes() + # First 9 digits of frac(sqrt(73)) + np.random.seed(544003745) + data = [np.random.normal(size=100)] + + # Different size quantile list and plots + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[0.1, 0.2], [0.5, 0.7]]) + + +def test_violinplot_outofrange_quantiles(): + ax = plt.axes() + # First 9 digits of frac(sqrt(79)) + np.random.seed(888194417) + data = [np.random.normal(size=100)] + + # Quantile value above 100 + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[0.1, 0.2, 0.3, 1.05]]) + + # Quantile value below 0 + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[-0.05, 0.2, 0.3, 0.75]]) + + +@check_figures_equal(extensions=["png"]) +def test_violinplot_single_list_quantiles(fig_test, fig_ref): + # Ensures quantile list for 1D can be passed in as single list + # First 9 digits of frac(sqrt(83)) + np.random.seed(110433579) + data = [np.random.normal(size=100)] + + # Test image + ax = fig_test.subplots() + ax.violinplot(data, quantiles=[0.1, 0.3, 0.9]) + + # Reference image + ax = fig_ref.subplots() + ax.violinplot(data, quantiles=[[0.1, 0.3, 0.9]]) + + def test_manage_xticks(): _, ax = plt.subplots() ax.set_xlim(0, 4)
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: