From fea06c94baa25b64c33fc3acbc214c32cecc4ac0 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Sat, 13 Jul 2024 10:53:22 +0200 Subject: [PATCH 1/8] ENH: Add grouped_bar() method --- doc/_embedded_plots/grouped_bar.py | 15 ++ doc/api/axes_api.rst | 1 + doc/api/pyplot_summary.rst | 1 + doc/users/next_whats_new/grouped_bar.rst | 25 ++ lib/matplotlib/axes/_axes.py | 326 +++++++++++++++++++++++ lib/matplotlib/axes/_axes.pyi | 19 ++ lib/matplotlib/pyplot.py | 28 ++ tools/boilerplate.py | 1 + 8 files changed, 416 insertions(+) create mode 100644 doc/_embedded_plots/grouped_bar.py create mode 100644 doc/users/next_whats_new/grouped_bar.rst diff --git a/doc/_embedded_plots/grouped_bar.py b/doc/_embedded_plots/grouped_bar.py new file mode 100644 index 000000000000..f02e269328d2 --- /dev/null +++ b/doc/_embedded_plots/grouped_bar.py @@ -0,0 +1,15 @@ +import matplotlib.pyplot as plt + +categories = ['A', 'B'] +data0 = [1.0, 3.0] +data1 = [1.4, 3.4] +data2 = [1.8, 3.8] + +fig, ax = plt.subplots(figsize=(4, 2.2)) +ax.grouped_bar( + [data0, data1, data2], + tick_labels=categories, + labels=['dataset 0', 'dataset 1', 'dataset 2'], + colors=['#1f77b4', '#58a1cf', '#abd0e6'], +) +ax.legend() diff --git a/doc/api/axes_api.rst b/doc/api/axes_api.rst index 4bbcbe081194..b742ce9b7a55 100644 --- a/doc/api/axes_api.rst +++ b/doc/api/axes_api.rst @@ -67,6 +67,7 @@ Basic Axes.bar Axes.barh Axes.bar_label + Axes.grouped_bar Axes.stem Axes.eventplot diff --git a/doc/api/pyplot_summary.rst b/doc/api/pyplot_summary.rst index cdd57bfe6276..c4a860fd2590 100644 --- a/doc/api/pyplot_summary.rst +++ b/doc/api/pyplot_summary.rst @@ -60,6 +60,7 @@ Basic bar barh bar_label + grouped_bar stem eventplot pie diff --git a/doc/users/next_whats_new/grouped_bar.rst b/doc/users/next_whats_new/grouped_bar.rst new file mode 100644 index 000000000000..64f2905fbf0c --- /dev/null +++ b/doc/users/next_whats_new/grouped_bar.rst @@ -0,0 +1,25 @@ +Grouped bar charts +------------------ + +The new method `~.Axes.grouped_bar()` simplifies the creation of grouped bar charts +significantly. It supports different input data types (lists of datasets, dicts of +datasets, data in 2D arrays, pandas DataFrames), and allows for easy customization +of placement via controllable distances between bars and between bar groups. + +Example: + +.. plot:: + :include-source: true + + import matplotlib.pyplot as plt + + categories = ['A', 'B'] + datasets = { + 'dataset 0': [1.0, 3.0], + 'dataset 1': [1.4, 3.4], + 'dataset 2': [1.8, 3.8], + } + + fig, ax = plt.subplots(figsize=(4, 2.2)) + ax.grouped_bar(datasets, tick_labels=categories) + ax.legend() diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index e480f8f29598..e10c1808176a 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -64,6 +64,23 @@ def _make_axes_method(func): return func +class _GroupedBarReturn: + """ + A provisional result object for `.Axes.grouped_bar`. + + This is a placeholder for a future better return type. We try to build in + backward compatibility / migration possibilities. + + The only public interfaces are the ``bar_containers`` attribute and the + ``remove()`` method. + """ + def __init__(self, bar_containers): + self.bar_containers = bar_containers + + def remove(self): + [b.remove() for b in self.bars] + + @_docstring.interpd class Axes(_AxesBase): """ @@ -2414,6 +2431,7 @@ def bar(self, x, height, width=0.8, bottom=None, *, align="center", See Also -------- barh : Plot a horizontal bar plot. + grouped_bar : Plot multiple datasets as grouped bar plot. Notes ----- @@ -3014,6 +3032,314 @@ def broken_barh(self, xranges, yrange, **kwargs): return col + @_docstring.interpd + def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing=0, + tick_labels=None, labels=None, orientation="vertical", colors=None, + **kwargs): + """ + Make a grouped bar plot. + + .. note:: + This function is new in v3.11, and the API is still provisional. + We may still fine-tune some aspects based on user-feedback. + + This is a convenience function to plot bars for multiple datasets. + In particular, it simplifies positioning of the bars compared to individual + `~.Axes.bar` plots. + + Bar plots present categorical data as a sequence of bars, one bar per category. + We call one set of such values a *dataset* and it's bars all share the same + color. Grouped bar plots show multiple such datasets, where the values per + category are grouped together. The category names are drawn as tick labels + below the bar groups. Each dataset has a distinct bar color, and can optionally + get a label that is used for the legend. + + Here is an example call structure and the corresponding plot: + + .. code-block:: python + + grouped_bar([dataset_1, dataset_2, dataset_3], + tick_labels=['A', 'B'], + labels=['dataset 1', 'dataset 2', 'dataset 3']) + + .. plot:: _embedded_plots/grouped_bar.py + + Parameters + ---------- + heights : list of array-like or dict of array-like or 2D array \ +or pandas.DataFrame + The heights for all x and groups. One of: + + - list of array-like: A list of datasets, each dataset must have + the same number of elements. + + .. code-block:: none + + # category_A, category_B + dataset_0 = [ds0_A, ds0_B] + dataset_1 = [ds1_A, ds1_B] + dataset_2 = [ds2_A, ds2_B] + + Example call:: + + grouped_bar([dataset_0, dataset_1, dataset_2]) + + - dict of array-like: A mapping from names to datasets. Each dataset + (dict value) must have the same number of elements. + + This is similar to passing a list of array-like, with the addition that + each dataset gets a name. + + Example call: + + .. code-block:: python + + grouped_bar({'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]}) + + The names are used as *labels*, i.e. the following two calls are + equivalent: + + .. code-block:: python + + data_dict = {'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]} + grouped_bar(data_dict) + grouped_bar(data_dict.values(), labels=data_dict.keys()) + + When using a dict-like input, you must not pass *labels* explicitly. + + - a 2D array: The rows are the categories, the columns are the different + datasets. + + .. code-block:: none + + dataset_0 dataset_1 dataset_2 + category_A ds0_a ds1_a ds2_a + category_B ds0_b ds1_b ds2_b + + Example call: + + .. code-block:: python + + group_labels = ["group_A", "group_B"] + dataset_labels = ["dataset_0", "dataset_1", "dataset_2"] + array = np.random.random((2, 3)) + + Note that this is consistent with pandas. These two calls produce + the same bar plot structure: + + .. code-block:: python + + grouped_bar(array, tick_labels=categories, labels=dataset_labels) + df = pd.DataFrame(array, index=categories, columns=dataset_labels) + df.plot.bar() + + - a `pandas.DataFrame`. + + .. code-block:: python + + df = pd.DataFrame( + np.random.random((2, 3)) + index=["group_A", "group_B"], + columns=["dataset_0", "dataset_1", "dataset_2"] + ) + grouped_bar(df) + + Note that ``grouped_bar(df)`` produces a structurally equivalent plot like + ``df.plot.bar()``. + + positions : array-like, optional + The center positions of the bar groups. The values have to be equidistant. + If not given, a sequence of integer positions 0, 1, 2, ... is used. + + tick_labels : list of str, optional + The category labels, which are placed on ticks at the center *positions* + of the bar groups. + + If not set, the axis ticks (positions and labels) are left unchanged. + + labels : list of str, optional + The labels of the datasets, i.e. the bars within one group. + These will show up in the legend. + + group_spacing : float, default: 1.5 + The space between two bar groups in units of bar width. + + The default value of 1.5 thus means that there's a gap of + 1.5 bar widths between bar groups. + + bar_spacing : float, default: 0 + The space between bars in units of bar width. + + orientation : {"vertical", "horizontal"}, default: "vertical" + The direction of the bars. + + colors : list of :mpltype:`color`, optional + A sequence of colors to be cycled through and used to color bars + of the different datasets. The sequence need not be exactly the + same length as the number of provided y, in which case the colors + will repeat from the beginning. + + If not specified, the colors from the Axes property cycle will be used. + + **kwargs : `.Rectangle` properties + + %(Rectangle:kwdoc)s + + Returns + ------- + _GroupedBarReturn + + A provisional result object. This will be refined in the future. + For now, the guaranteed API on the returned object is limited to + + - the attribute ``bar_containers``, which is a list of + `.BarContainer`, i.e. the results of the individual `~.Axes.bar` + calls for each dataset. + + - a ``remove()`` method, that remove all bars from the Axes. + See also `.Artist.remove()`. + + See Also + -------- + bar : A lower-level API for bar plots, with more degrees of freedom like + individual bar sizes and colors. + + Notes + ----- + For a better understanding, we compare the `~.Axes.grouped_bar` API with + those of `~.Axes.bar` and `~.Axes.boxplot`. + + **Comparison to bar()** + + `~.Axes.grouped_bar` intentionally deviates from the `~.Axes.bar` API in some + aspects. ``bar(x, y)`` is a lower-level API and places bars with height *y* + at explicit positions *x*. It also allows to specify individual bar widths + and colors. This kind of detailed control and flexibility is difficult to + manage and often not needed when plotting multiple datasets as grouped bar + plot. Therefore, ``grouped_bar`` focusses on the abstraction of bar plots + as visualization of categorical data. + + The following examples may help to transfer from ``bar`` to + ``grouped_bar``. + + Positions are de-emphasized due to categories, and default to integer values. + If you have used ``range(N)`` as positions, you can leave that value out:: + + bar(range(N), heights) + grouped_bar([heights]) + + If needed, positions can be passed as keyword arguments:: + + bar(x, heights) + grouped_bar([heights], positions=x) + + To place category labels in `~.Axes.bar` you could use the argument + *tick_label* or use a list of category names as *x*. + `~.Axes.grouped_bar` expects them in the argument *tick_labels*:: + + bar(range(N), heights, tick_label=["A", "B"]) + bar(["A", "B"], heights) + grouped_bar([heights], tick_labels=["A", "B"]) + + Dataset labels, which are shown in the legend, are still passed via the + *label* parameter:: + + bar(..., label="dataset") + grouped_bar(..., label=["dataset"]) + + **Comparison to boxplot()** + + Both, `~.Axes.grouped_bar` and `~.Axes.boxplot` visualize categorical data + from multiple datasets. The basic API on *tick_labels* and *positions* + is the same, so that you can easily switch between plotting all + individual values as `~.Axes.grouped_bar` or the statistical distribution + per category as `~.Axes.boxplot`:: + + grouped_bar(values, positions=..., tick_labels=...) + boxplot(values, positions=..., tick_labels=...) + + """ + if cbook._is_pandas_dataframe(heights): + if labels is None: + labels = heights.columns.tolist() + if tick_labels is None: + tick_labels = heights.index.tolist() + heights = heights.to_numpy().T + elif hasattr(heights, 'keys'): # dict + if labels is not None: + raise ValueError( + "'labels' cannot be used if 'heights' are a mapping") + labels = heights.keys() + heights = list(heights.values()) + elif hasattr(heights, 'shape'): # numpy array + heights = heights.T + + num_datasets = len(heights) + dataset_0 = next(iter(heights)) + num_groups = len(dataset_0) + + if positions is None: + group_centers = np.arange(num_groups) + group_distance = 1 + else: + group_centers = np.asanyarray(positions) + if len(group_centers) > 1: + d = np.diff(group_centers) + if not np.allclose(d, d.mean()): + raise ValueError("'positions' must be equidistant") + group_distance = d[0] + else: + group_distance = 1 + + for i, dataset in enumerate(heights): + if len(dataset) != num_groups: + raise ValueError( + f"'x' indicates {num_groups} groups, but dataset {i} " + f"has {len(dataset)} groups" + ) + + _api.check_in_list(["vertical", "horizontal"], orientation=orientation) + + if colors is None: + colors = itertools.cycle([None]) + else: + # Note: This is equivalent to the behavior in stackplot + # TODO: do we want to be more restrictive and check lengths? + colors = itertools.cycle(colors) + + bar_width = (group_distance / + (num_datasets + (num_datasets - 1) * bar_spacing + group_spacing)) + bar_spacing_abs = bar_spacing * bar_width + margin_abs = 0.5 * group_spacing * bar_width + + if labels is None: + labels = [None] * num_datasets + else: + assert len(labels) == num_datasets + + # place the bars, but only use numerical positions, categorical tick labels + # are handled separately below + bar_containers = [] + for i, (hs, label, color) in enumerate( + zip(heights, labels, colors)): + lefts = (group_centers - 0.5 * group_distance + margin_abs + + i * (bar_width + bar_spacing_abs)) + if orientation == "vertical": + bc = self.bar(lefts, hs, width=bar_width, align="edge", + label=label, color=color, **kwargs) + else: + bc = self.barh(lefts, hs, height=bar_width, align="edge", + label=label, color=color, **kwargs) + bar_containers.append(bc) + + if tick_labels is not None: + if orientation == "vertical": + self.xaxis.set_ticks(group_centers, labels=tick_labels) + else: + self.yaxis.set_ticks(group_centers, labels=tick_labels) + + return _GroupedBarReturn(bar_containers) + @_preprocess_data() def stem(self, *args, linefmt=None, markerfmt=None, basefmt=None, bottom=0, label=None, orientation='vertical'): diff --git a/lib/matplotlib/axes/_axes.pyi b/lib/matplotlib/axes/_axes.pyi index c3eb28d2f095..f606a65753f4 100644 --- a/lib/matplotlib/axes/_axes.pyi +++ b/lib/matplotlib/axes/_axes.pyi @@ -37,6 +37,12 @@ from typing import Any, Literal, overload import numpy as np from numpy.typing import ArrayLike from matplotlib.typing import ColorType, MarkerType, LineStyleType +import pandas as pd + + +class _GroupedBarReturn: + def __init__(self, bar_containers: list[BarContainer]) -> None: ... + def remove(self) -> None: ... class Axes(_AxesBase): def get_title(self, loc: Literal["left", "center", "right"] = ...) -> str: ... @@ -265,6 +271,19 @@ class Axes(_AxesBase): data=..., **kwargs ) -> PolyCollection: ... + def grouped_bar( + self, + heights : Sequence[ArrayLike] | dict[str, ArrayLike] | np.ndarray | pd.DataFrame, + *, + positions : ArrayLike | None = ..., + tick_labels : Sequence[str] | None = ..., + labels : Sequence[str] | None = ..., + group_spacing : float | None = ..., + bar_spacing : float | None = ..., + orientation: Literal["vertical", "horizontal"] = ..., + colors: Iterable[ColorType] | None = ..., + **kwargs + ) -> list[BarContainer]: ... def stem( self, *args: ArrayLike | str, diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 78fc962d9c5c..cf5c9b4b739f 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -93,6 +93,7 @@ import PIL.Image from numpy.typing import ArrayLike + import pandas as pd import matplotlib.axes import matplotlib.artist @@ -3404,6 +3405,33 @@ def grid( gca().grid(visible=visible, which=which, axis=axis, **kwargs) +# Autogenerated by boilerplate.py. Do not edit as changes will be lost. +@_copy_docstring_and_deprecators(Axes.grouped_bar) +def grouped_bar( + heights: Sequence[ArrayLike] | dict[str, ArrayLike] | np.ndarray | pd.DataFrame, + *, + positions: ArrayLike | None = None, + group_spacing: float | None = 1.5, + bar_spacing: float | None = 0, + tick_labels: Sequence[str] | None = None, + labels: Sequence[str] | None = None, + orientation: Literal["vertical", "horizontal"] = "vertical", + colors: Iterable[ColorType] | None = None, + **kwargs, +) -> list[BarContainer]: + return gca().grouped_bar( + heights, + positions=positions, + group_spacing=group_spacing, + bar_spacing=bar_spacing, + tick_labels=tick_labels, + labels=labels, + orientation=orientation, + colors=colors, + **kwargs, + ) + + # Autogenerated by boilerplate.py. Do not edit as changes will be lost. @_copy_docstring_and_deprecators(Axes.hexbin) def hexbin( diff --git a/tools/boilerplate.py b/tools/boilerplate.py index f018dfc887c8..11ec15ac1c44 100644 --- a/tools/boilerplate.py +++ b/tools/boilerplate.py @@ -238,6 +238,7 @@ def boilerplate_gen(): 'fill_between', 'fill_betweenx', 'grid', + 'grouped_bar', 'hexbin', 'hist', 'stairs', From 556895dd9f22fd6d660cadbbf74b309df08c30f1 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Thu, 23 Jan 2025 14:56:56 +0100 Subject: [PATCH 2/8] Add tests for grouped_bar() --- .../baseline_images/test_axes/grouped_bar.png | Bin 0 -> 3914 bytes lib/matplotlib/tests/test_axes.py | 85 ++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 lib/matplotlib/tests/baseline_images/test_axes/grouped_bar.png diff --git a/lib/matplotlib/tests/baseline_images/test_axes/grouped_bar.png b/lib/matplotlib/tests/baseline_images/test_axes/grouped_bar.png new file mode 100644 index 0000000000000000000000000000000000000000..19d676a6b6629d12cfcdddec42b7fc329e35078a GIT binary patch literal 3914 zcmd^Cdr*^C7QX>Te1a`n6qL|9Te=J76?q9EELzY-LB(hw<&k0pf+Qdb(C`Qq&}FC~ zt@0)mqeKD;E;K{}BviK`%QFZG@(?JdZ_x96_yM%w`ZyG|TGb`}7w6aZjjTems}Om&kFrD|m1U zM~=EoB3};w$}|>#B_^CiL|EEbTUuLQ#K+p$96oFwdNJgptsMeyYvW{j(bn45&f3Nn zVPP6ZCP&2_v9gN%m%b(GN~jgA_S^tuV_Vd5zZd}Yahex)AH@s@KrjEqu|J=SFP0An zh3zH7Tjv!8J8qBzF}U`hKJ=h`OhNy}^hDY>djc>y7CAHJ%`*PNCs()gxAW(}I$QPG z)fxD?JpuRjJUKxA7-x~s-p#E0nX!M)#c*B+Nef0wlZRA|fqBLUT)&4qlW3vSWc~<| z2f$xN5-Q}5mlx0iX&x6Qh|=!;!iG zlSbrW7Zw)m933%9Nd<8@tyi`k9UXl=J*Q>+L9-f1rSk0~2m7cV2GB|smd9bOJGyCW zYu}InkZI%KaN+3DqcqoSvkz19#)GU@^>*&u>IN{l@6?Ir3+W_~60Efwn0^a%z}7en-OU>?ntBXY?Mt4UfO_D?b;CF=9Xz;G4|~U+ zdgo?*-xQyYQh|k;!h5|Bf=Vu!D2JfJlx<#ZY!s^oENb^eH9P`|6FnbH2$gLE4<`Q) zM%Q`c)2>7$?o4ug^aaRh^XS-?-!gjNH?-g7czM5L)EtiHe!L+zi&bA=U-|mY4&mAa z%VqBNk^-exOw%HNoK&nX%hhiv0?~Iu5^;`izMgNc>_D@NW0ptjs&B>QB~G#4aw5hF z#q}tYN;bjpd2Q#)4&ji=UCSaL-m}7}?UFbm3JRdCnnE|L(HZ8(CRmXGNng z#C-bYv9jU%7!706)i3O9hpGD;mVYU zsXci?$$kxS3Qp4MQh$XnQ~D(5kW$VY6=xWjNed%5;mB;07h7)P<%_dVH7=DVuY@0T zjQXK2a_GEO)8bhus0SjEXhk($DqHAAsTU#?LUl6o?W@NWX5hhaF9v^>%; zkxcK^%e`gnmdl-=pC5b}9Wnw1rTn{rPoFWV@l2j&00Eh4ia;PJGd*R4HF&Q8+M1F* z-4k*tG*nhZ5>!Aga_R2-U{7l7p(q&)M*l2XD)NN0YeT!mW~GU})Q^yJnO1`P-b`Nz z6{0{jRhXZDpEsJ^aHPar7QUE2J3Gr_4K{uqGw}!LK()ZOW*w|-Yik>9h$ErV>>#1Q zggR4pggkS42)EdnFjvnkFE1w$2&^m zgal*iT3T9ILZOgOl~$mH_6US8YW+<}XIGc_+Ad8F^4Na0<=3uXuVfM8;~(moxL_hA z9JVGALKe(;5{QiuS5+`elldN=p4e`Nr|JO2|3B?2cJejzp;vJuB;wC`!@rbF ne`&Wt`}0zM{ViY<$Fy4K(~;@DGx{YA06izZ@IJ Date: Wed, 22 Jan 2025 18:47:48 +0100 Subject: [PATCH 3/8] Simplify "Grouped bar chart with labels" using grouped_bar() --- .../examples/lines_bars_and_markers/barchart.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/galleries/examples/lines_bars_and_markers/barchart.py b/galleries/examples/lines_bars_and_markers/barchart.py index f2157a89c0cd..dbb0f5bbbadd 100644 --- a/galleries/examples/lines_bars_and_markers/barchart.py +++ b/galleries/examples/lines_bars_and_markers/barchart.py @@ -10,7 +10,6 @@ # data from https://allisonhorst.github.io/palmerpenguins/ import matplotlib.pyplot as plt -import numpy as np species = ("Adelie", "Chinstrap", "Gentoo") penguin_means = { @@ -19,22 +18,15 @@ 'Flipper Length': (189.95, 195.82, 217.19), } -x = np.arange(len(species)) # the label locations -width = 0.25 # the width of the bars -multiplier = 0 - fig, ax = plt.subplots(layout='constrained') -for attribute, measurement in penguin_means.items(): - offset = width * multiplier - rects = ax.bar(x + offset, measurement, width, label=attribute) - ax.bar_label(rects, padding=3) - multiplier += 1 +res = ax.grouped_bar(penguin_means, tick_labels=species, group_spacing=1) +for container in res.bar_containers: + ax.bar_label(container, padding=3) -# Add some text for labels, title and custom x-axis tick labels, etc. +# Add some text for labels, title, etc. ax.set_ylabel('Length (mm)') ax.set_title('Penguin attributes by species') -ax.set_xticks(x + width, species) ax.legend(loc='upper left', ncols=3) ax.set_ylim(0, 250) From 7fa82d7ed9bdbf2934e0b37bd70b3f8e960310b4 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Fri, 24 Jan 2025 00:20:18 +0100 Subject: [PATCH 4/8] Apply suggestions from code review Co-authored-by: hannah --- doc/users/next_whats_new/grouped_bar.rst | 9 +-- lib/matplotlib/axes/_axes.py | 90 +++++++++++------------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/doc/users/next_whats_new/grouped_bar.rst b/doc/users/next_whats_new/grouped_bar.rst index 64f2905fbf0c..af57c71b8a3a 100644 --- a/doc/users/next_whats_new/grouped_bar.rst +++ b/doc/users/next_whats_new/grouped_bar.rst @@ -10,16 +10,17 @@ Example: .. plot:: :include-source: true + :alt: Diagram of a grouped bar chart of 3 datasets with 2 categories. import matplotlib.pyplot as plt categories = ['A', 'B'] datasets = { - 'dataset 0': [1.0, 3.0], - 'dataset 1': [1.4, 3.4], - 'dataset 2': [1.8, 3.8], + 'dataset 0': [1, 11], + 'dataset 1': [3, 13], + 'dataset 2': [5, 15], } - fig, ax = plt.subplots(figsize=(4, 2.2)) + fig, ax = plt.subplots() ax.grouped_bar(datasets, tick_labels=categories) ax.legend() diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index e10c1808176a..1ca2630e7166 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -3039,22 +3039,20 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing """ Make a grouped bar plot. - .. note:: + .. versionadded:: 3.11 + This function is new in v3.11, and the API is still provisional. We may still fine-tune some aspects based on user-feedback. - This is a convenience function to plot bars for multiple datasets. - In particular, it simplifies positioning of the bars compared to individual - `~.Axes.bar` plots. - - Bar plots present categorical data as a sequence of bars, one bar per category. - We call one set of such values a *dataset* and it's bars all share the same - color. Grouped bar plots show multiple such datasets, where the values per - category are grouped together. The category names are drawn as tick labels - below the bar groups. Each dataset has a distinct bar color, and can optionally - get a label that is used for the legend. + Grouped bar charts visualize a collection of multiple categorical datasets. + A categorical dataset is a mapping *name* -> *value*. The values of the + dataset are represented by a sequence of bars of the same color. + In a grouped bar chart, the bars of all datasets are grouped together by + category. The category names are drawn as tick labels next to the bar group. + Each dataset has a distinct bar color, and can optionally get a label that + is used for the legend. - Here is an example call structure and the corresponding plot: + Example: .. code-block:: python @@ -3087,25 +3085,20 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing - dict of array-like: A mapping from names to datasets. Each dataset (dict value) must have the same number of elements. - This is similar to passing a list of array-like, with the addition that - each dataset gets a name. - Example call: .. code-block:: python - grouped_bar({'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]}) + data_dict = {'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2} + grouped_bar(data_dict) - The names are used as *labels*, i.e. the following two calls are - equivalent: + The names are used as *labels*, i.e. this is equivalent to .. code-block:: python - data_dict = {'ds0': dataset_0, 'ds1': dataset_1, 'ds2': dataset_2]} - grouped_bar(data_dict) grouped_bar(data_dict.values(), labels=data_dict.keys()) - When using a dict-like input, you must not pass *labels* explicitly. + When using a dict input, you must not pass *labels* explicitly. - a 2D array: The rows are the categories, the columns are the different datasets. @@ -3120,30 +3113,31 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing .. code-block:: python - group_labels = ["group_A", "group_B"] + categories = ["A", "B"] dataset_labels = ["dataset_0", "dataset_1", "dataset_2"] array = np.random.random((2, 3)) - - Note that this is consistent with pandas. These two calls produce - the same bar plot structure: - - .. code-block:: python - grouped_bar(array, tick_labels=categories, labels=dataset_labels) - df = pd.DataFrame(array, index=categories, columns=dataset_labels) - df.plot.bar() - a `pandas.DataFrame`. + The index is used for the categories, the columns are used for the + datasets. + .. code-block:: python df = pd.DataFrame( - np.random.random((2, 3)) - index=["group_A", "group_B"], + np.random.random((2, 3)), + index=["A", "B"], columns=["dataset_0", "dataset_1", "dataset_2"] ) grouped_bar(df) + i.e. this is equivalent to + + .. code-block:: + + grouped_bar(df.to_numpy(), tick_labels=df.index, labels=df.columns) + Note that ``grouped_bar(df)`` produces a structurally equivalent plot like ``df.plot.bar()``. @@ -3153,22 +3147,21 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing tick_labels : list of str, optional The category labels, which are placed on ticks at the center *positions* - of the bar groups. - - If not set, the axis ticks (positions and labels) are left unchanged. + of the bar groups. If not set, the axis ticks (positions and labels) are + left unchanged. labels : list of str, optional The labels of the datasets, i.e. the bars within one group. These will show up in the legend. group_spacing : float, default: 1.5 - The space between two bar groups in units of bar width. + The space between two bar groups as multiples of bar width. The default value of 1.5 thus means that there's a gap of 1.5 bar widths between bar groups. bar_spacing : float, default: 0 - The space between bars in units of bar width. + The space between bars as multiples of bar width. orientation : {"vertical", "horizontal"}, default: "vertical" The direction of the bars. @@ -3215,7 +3208,7 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing aspects. ``bar(x, y)`` is a lower-level API and places bars with height *y* at explicit positions *x*. It also allows to specify individual bar widths and colors. This kind of detailed control and flexibility is difficult to - manage and often not needed when plotting multiple datasets as grouped bar + manage and often not needed when plotting multiple datasets as a grouped bar plot. Therefore, ``grouped_bar`` focusses on the abstraction of bar plots as visualization of categorical data. @@ -3275,8 +3268,18 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing heights = heights.T num_datasets = len(heights) - dataset_0 = next(iter(heights)) - num_groups = len(dataset_0) + num_groups = len(next(iter(heights))) # inferred from first dataset + + # validate that all datasets have the same length, i.e. num_groups + # - can be skipped if heights is an array + if not hasattr(heights, 'shape'): + for i, dataset in enumerate(heights): + if len(dataset) != num_groups: + raise ValueError( + "'heights' contains datasets with different number of " + f"elements. dataset 0 has {num_groups} elements but " + f"dataset {i} has {len(dataset)} elements." + ) if positions is None: group_centers = np.arange(num_groups) @@ -3291,13 +3294,6 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing else: group_distance = 1 - for i, dataset in enumerate(heights): - if len(dataset) != num_groups: - raise ValueError( - f"'x' indicates {num_groups} groups, but dataset {i} " - f"has {len(dataset)} groups" - ) - _api.check_in_list(["vertical", "horizontal"], orientation=orientation) if colors is None: From 8cf06c487882f0270cb72810e56b7313603f63e3 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Sat, 1 Feb 2025 13:08:45 +0100 Subject: [PATCH 5/8] Docstring wording --- lib/matplotlib/axes/_axes.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 1ca2630e7166..e6d392d0c191 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -3041,16 +3041,17 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing .. versionadded:: 3.11 - This function is new in v3.11, and the API is still provisional. - We may still fine-tune some aspects based on user-feedback. - - Grouped bar charts visualize a collection of multiple categorical datasets. - A categorical dataset is a mapping *name* -> *value*. The values of the - dataset are represented by a sequence of bars of the same color. - In a grouped bar chart, the bars of all datasets are grouped together by - category. The category names are drawn as tick labels next to the bar group. - Each dataset has a distinct bar color, and can optionally get a label that - is used for the legend. + The API is still provisional. We may still fine-tune some aspects based on + user-feedback. + + Grouped bar charts visualize a collection of categorical datasets. Each value + in a dataset belongs to a distinct category and these categories are the same + across all datasets. The categories typically have string names, but could + also be dates or index keys. The values in each dataset are represented by a + sequence of bars of the same color. The bars of all datasets are grouped + together by their shared categories. The category names are drawn as the tick + labels for each bar group. Each dataset has a distinct bar color, and can + optionally get a label that is used for the legend. Example: From a4ed768b184a28a1725f4ffe2c0c2d963bc0ac52 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:27:34 +0100 Subject: [PATCH 6/8] Update lib/matplotlib/axes/_axes.py Co-authored-by: hannah --- lib/matplotlib/axes/_axes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index e6d392d0c191..d9dd17252e0b 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -3074,10 +3074,10 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing .. code-block:: none - # category_A, category_B - dataset_0 = [ds0_A, ds0_B] - dataset_1 = [ds1_A, ds1_B] - dataset_2 = [ds2_A, ds2_B] + # category_A, category_B + dataset_0 = [value_0_A, value_0_B] + dataset_1 = [value_1_A, value_1_B] + dataset_2 = [value_2_A, value_2_B] Example call:: From d9aa5f64bae90654b05560afde48a2226f454e99 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Mon, 2 Jun 2025 16:06:33 +0200 Subject: [PATCH 7/8] Add test for grouped_bar() return value --- lib/matplotlib/axes/_axes.py | 2 +- lib/matplotlib/tests/test_axes.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index d9dd17252e0b..fa5ff198fe26 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -78,7 +78,7 @@ def __init__(self, bar_containers): self.bar_containers = bar_containers def remove(self): - [b.remove() for b in self.bars] + [b.remove() for b in self.bar_containers] @_docstring.interpd diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index e7158845307d..605e7b557713 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -23,6 +23,7 @@ from matplotlib import rc_context, patheffects import matplotlib.colors as mcolors import matplotlib.dates as mdates +from matplotlib.container import BarContainer from matplotlib.figure import Figure from matplotlib.axes import Axes from matplotlib.lines import Line2D @@ -2251,6 +2252,20 @@ def test_grouped_bar_dataframe(fig_test, fig_ref, pd): ax.legend() +def test_grouped_bar_return_value(): + fig, ax = plt.subplots() + ret = ax.grouped_bar([[1, 2, 3], [11, 12, 13]], tick_labels=['A', 'B', 'C']) + + assert len(ret.bar_containers) == 2 + for bc in ret.bar_containers: + assert isinstance(bc, BarContainer) + assert bc in ax.containers + + ret.remove() + for bc in ret.bar_containers: + assert bc not in ax.containers + + def test_boxplot_dates_pandas(pd): # smoke test for boxplot and dates in pandas data = np.random.rand(5, 2) From e0afe743127b4462df362148ba63537aa7980132 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Mon, 2 Jun 2025 16:16:22 +0200 Subject: [PATCH 8/8] Apply suggestions from code review --- lib/matplotlib/axes/_axes.py | 30 ++++++++++++++---------------- lib/matplotlib/axes/_axes.pyi | 13 +++++++------ lib/matplotlib/tests/test_axes.py | 8 ++++---- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index fa5ff198fe26..b4ed7ae22d35 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -3057,9 +3057,9 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing .. code-block:: python - grouped_bar([dataset_1, dataset_2, dataset_3], + grouped_bar([dataset_0, dataset_1, dataset_2], tick_labels=['A', 'B'], - labels=['dataset 1', 'dataset 2', 'dataset 3']) + labels=['dataset 0', 'dataset 1', 'dataset 2']) .. plot:: _embedded_plots/grouped_bar.py @@ -3156,13 +3156,13 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing These will show up in the legend. group_spacing : float, default: 1.5 - The space between two bar groups as multiples of bar width. + The space between two bar groups as a multiple of bar width. The default value of 1.5 thus means that there's a gap of 1.5 bar widths between bar groups. bar_spacing : float, default: 0 - The space between bars as multiples of bar width. + The space between bars as a multiple of bar width. orientation : {"vertical", "horizontal"}, default: "vertical" The direction of the bars. @@ -3181,17 +3181,17 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing Returns ------- - _GroupedBarReturn + _GroupedBarReturn - A provisional result object. This will be refined in the future. - For now, the guaranteed API on the returned object is limited to + A provisional result object. This will be refined in the future. + For now, the guaranteed API on the returned object is limited to - - the attribute ``bar_containers``, which is a list of - `.BarContainer`, i.e. the results of the individual `~.Axes.bar` - calls for each dataset. + - the attribute ``bar_containers``, which is a list of + `.BarContainer`, i.e. the results of the individual `~.Axes.bar` + calls for each dataset. - - a ``remove()`` method, that remove all bars from the Axes. - See also `.Artist.remove()`. + - a ``remove()`` method, that remove all bars from the Axes. + See also `.Artist.remove()`. See Also -------- @@ -3261,8 +3261,7 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing heights = heights.to_numpy().T elif hasattr(heights, 'keys'): # dict if labels is not None: - raise ValueError( - "'labels' cannot be used if 'heights' are a mapping") + raise ValueError("'labels' cannot be used if 'heights' is a mapping") labels = heights.keys() heights = list(heights.values()) elif hasattr(heights, 'shape'): # numpy array @@ -3317,8 +3316,7 @@ def grouped_bar(self, heights, *, positions=None, group_spacing=1.5, bar_spacing # place the bars, but only use numerical positions, categorical tick labels # are handled separately below bar_containers = [] - for i, (hs, label, color) in enumerate( - zip(heights, labels, colors)): + for i, (hs, label, color) in enumerate(zip(heights, labels, colors)): lefts = (group_centers - 0.5 * group_distance + margin_abs + i * (bar_width + bar_spacing_abs)) if orientation == "vertical": diff --git a/lib/matplotlib/axes/_axes.pyi b/lib/matplotlib/axes/_axes.pyi index f606a65753f4..0008363b8220 100644 --- a/lib/matplotlib/axes/_axes.pyi +++ b/lib/matplotlib/axes/_axes.pyi @@ -41,6 +41,7 @@ import pandas as pd class _GroupedBarReturn: + bar_containers: list[BarContainer] def __init__(self, bar_containers: list[BarContainer]) -> None: ... def remove(self) -> None: ... @@ -273,13 +274,13 @@ class Axes(_AxesBase): ) -> PolyCollection: ... def grouped_bar( self, - heights : Sequence[ArrayLike] | dict[str, ArrayLike] | np.ndarray | pd.DataFrame, + heights: Sequence[ArrayLike] | dict[str, ArrayLike] | np.ndarray | pd.DataFrame, *, - positions : ArrayLike | None = ..., - tick_labels : Sequence[str] | None = ..., - labels : Sequence[str] | None = ..., - group_spacing : float | None = ..., - bar_spacing : float | None = ..., + positions: ArrayLike | None = ..., + tick_labels: Sequence[str] | None = ..., + labels: Sequence[str] | None = ..., + group_spacing: float | None = ..., + bar_spacing: float | None = ..., orientation: Literal["vertical", "horizontal"] = ..., colors: Iterable[ColorType] | None = ..., **kwargs diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 605e7b557713..ae2e91b811f1 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -2182,7 +2182,7 @@ def test_grouped_bar(): ax.set_yticks([]) -@check_figures_equal(extensions=["png"]) +@check_figures_equal() def test_grouped_bar_list_of_datasets(fig_test, fig_ref): categories = ['A', 'B'] data1 = [1, 1.2] @@ -2205,7 +2205,7 @@ def test_grouped_bar_list_of_datasets(fig_test, fig_ref): ax.legend() -@check_figures_equal(extensions=["png"]) +@check_figures_equal() def test_grouped_bar_dict_of_datasets(fig_test, fig_ref): categories = ['A', 'B'] data_dict = dict(data1=[1, 1.2], data2=[2, 2.4], data3=[3, 3.6]) @@ -2219,7 +2219,7 @@ def test_grouped_bar_dict_of_datasets(fig_test, fig_ref): ax.legend() -@check_figures_equal(extensions=["png"]) +@check_figures_equal() def test_grouped_bar_array(fig_test, fig_ref): categories = ['A', 'B'] array = np.array([[1, 2, 3], [1.2, 2.4, 3.6]]) @@ -2235,7 +2235,7 @@ def test_grouped_bar_array(fig_test, fig_ref): ax.legend() -@check_figures_equal(extensions=["png"]) +@check_figures_equal() def test_grouped_bar_dataframe(fig_test, fig_ref, pd): categories = ['A', 'B'] labels = ['data1', 'data2', 'data3'] pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy