diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 09165c481b246e..721c5c8d334674 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -319,14 +319,19 @@ non-empty format specification typically modifies the result. The general form of a *standard format specifier* is: .. productionlist:: format-spec - format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`] + format_spec: [`options`][`width_and_precision`][`type`] + options: [[`fill`]`align`][`sign`]["z"]["#"]["0"] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " + width_and_precision: [`width_with_grouping`][`precision_with_grouping`] + width_with_grouping: [`width`][`grouping_option`] + precision_with_grouping: "." [`precision`]`grouping_option` width: `~python-grammar:digit`+ grouping_option: "_" | "," precision: `~python-grammar:digit`+ - type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" + type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" + : | "G" | "n" | "o" | "s" | "x" | "X" | "%" If a valid *align* value is specified, it can be preceded by a *fill* character that can be any character and defaults to a space if omitted. @@ -458,6 +463,13 @@ indicates the maximum field size - in other words, how many characters will be used from the field content. The *precision* is not allowed for integer presentation types. +The ``'_'`` or ``','`` option after *precision* means the use of an underscore +or a comma for a thousands separator of the fractional part for floating-point +presentation types. + +.. versionchanged:: 3.14 + Support thousands separators for the fractional part. + Finally, the *type* determines how the data should be presented. The available string presentation types are: @@ -704,10 +716,18 @@ Replacing ``%x`` and ``%o`` and converting the value to different bases:: >>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42) 'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010' -Using the comma as a thousands separator:: +Using the comma or the underscore as a thousands separator:: >>> '{:,}'.format(1234567890) '1,234,567,890' + >>> '{:_}'.format(1234567890) + '1_234_567_890' + >>> '{:_}'.format(123456789.123456789) + '123_456_789.12345679' + >>> '{:._}'.format(123456789.123456789) + '123456789.123_456_79' + >>> '{:_._}'.format(123456789.123456789) + '123_456_789.123_456_79' Expressing a percentage:: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9ac0e6ed2a6d40..95c2ab0ea8afe9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -278,6 +278,11 @@ Other language changes making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`126012`.) +* Support underscore and comma as thousands separators in the fractional part + for floating-point presentation types of the new-style string formatting + (with :func:`format` or :ref:`f-strings`). + (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) + * ``\B`` in :mod:`regular expression ` now matches empty input string. Now it is always the opposite of ``\b``. (Contributed by Serhiy Storchaka in :gh:`124130`.) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index a60372f58295a9..13c3213132568b 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -246,7 +246,8 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Py_ssize_t min_width, const char *grouping, PyObject *thousands_sep, - Py_UCS4 *maxchar); + Py_UCS4 *maxchar, + int forward); /* --- Misc functions ----------------------------------------------------- */ diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index f588e16b70123a..231b1047f72b39 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -754,6 +754,28 @@ def test_format(self): self.assertEqual(format(INF, 'f'), 'inf') self.assertEqual(format(INF, 'F'), 'INF') + # thousands separators + x = 123_456.123_456 + self.assertEqual(format(x, '_f'), '123_456.123456') + self.assertEqual(format(x, ',f'), '123,456.123456') + self.assertEqual(format(x, '._f'), '123456.123_456') + self.assertEqual(format(x, '.,f'), '123456.123,456') + self.assertEqual(format(x, '_._f'), '123_456.123_456') + self.assertEqual(format(x, ',.,f'), '123,456.123,456') + self.assertEqual(format(x, '.10_f'), '123456.123_456_000_0') + self.assertEqual(format(x, '.10,f'), '123456.123,456,000,0') + self.assertEqual(format(x, '>21._f'), ' 123456.123_456') + self.assertEqual(format(x, '<21._f'), '123456.123_456 ') + self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05') + self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05') + + self.assertRaises(ValueError, format, x, '._6f') + self.assertRaises(ValueError, format, x, '.,_f') + self.assertRaises(ValueError, format, x, '.6,_f') + self.assertRaises(ValueError, format, x, '.6_,f') + self.assertRaises(ValueError, format, x, '.6_n') + self.assertRaises(ValueError, format, x, '.6,n') + @support.requires_IEEE_754 def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 9dde63e40d06db..3916bc3d4cd54c 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -515,11 +515,15 @@ def test_with_a_commas_and_an_underscore_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): '{:,_}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.,_f}'.format(1.1) def test_with_an_underscore_and_a_comma_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): '{:_,}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:._,f}'.format(1.1) def test_better_error_message_format(self): # https://bugs.python.org/issue20524 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst new file mode 100644 index 00000000000000..46a761f22e562f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst @@ -0,0 +1,3 @@ +Support underscore and comma as thousands separators in the fractional part for +floating-point presentation types of the new-style string formatting (with +:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev. diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index d77715ec0de9ef..a4ab701de004c8 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -47,7 +47,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, PyObject *digits, Py_ssize_t *digits_pos, Py_ssize_t n_chars, Py_ssize_t n_zeros, PyObject *thousands_sep, Py_ssize_t thousands_sep_len, - Py_UCS4 *maxchar) + Py_UCS4 *maxchar, int forward) { if (!writer) { /* if maxchar > 127, maxchar is already set */ @@ -59,24 +59,39 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, } if (thousands_sep) { - *buffer_pos -= thousands_sep_len; - + if (!forward) { + *buffer_pos -= thousands_sep_len; + } /* Copy the thousands_sep chars into the buffer. */ _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, thousands_sep, 0, thousands_sep_len); + if (forward) { + *buffer_pos += thousands_sep_len; + } } - *buffer_pos -= n_chars; - *digits_pos -= n_chars; + if (!forward) { + *buffer_pos -= n_chars; + *digits_pos -= n_chars; + } _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, digits, *digits_pos, n_chars); + if (forward) { + *buffer_pos += n_chars; + *digits_pos += n_chars; + } if (n_zeros) { - *buffer_pos -= n_zeros; + if (!forward) { + *buffer_pos -= n_zeros; + } int kind = PyUnicode_KIND(writer->buffer); void *data = PyUnicode_DATA(writer->buffer); unicode_fill(kind, data, '0', *buffer_pos, n_zeros); + if (forward) { + *buffer_pos += n_zeros; + } } } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 75967d69ed374d..6f20de4e3b95b0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9764,7 +9764,8 @@ _PyUnicode_InsertThousandsGrouping( Py_ssize_t min_width, const char *grouping, PyObject *thousands_sep, - Py_UCS4 *maxchar) + Py_UCS4 *maxchar, + int forward) { min_width = Py_MAX(0, min_width); if (writer) { @@ -9801,14 +9802,14 @@ _PyUnicode_InsertThousandsGrouping( should be an empty string */ assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); - digits_pos = d_pos + n_digits; + digits_pos = d_pos + (forward ? 0 : n_digits); if (writer) { - buffer_pos = writer->pos + n_buffer; + buffer_pos = writer->pos + (forward ? 0 : n_buffer); assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); } else { - buffer_pos = n_buffer; + buffer_pos = forward ? 0 : n_buffer; } if (!writer) { @@ -9830,7 +9831,7 @@ _PyUnicode_InsertThousandsGrouping( digits, &digits_pos, n_chars, n_zeros, use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar); + thousands_sep_len, maxchar, forward); /* Use a separator next time. */ use_separator = 1; @@ -9859,7 +9860,7 @@ _PyUnicode_InsertThousandsGrouping( digits, &digits_pos, n_chars, n_zeros, use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar); + thousands_sep_len, maxchar, forward); } return count; } diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 16f711184990ac..4f8662b0a4c8fb 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -135,6 +135,7 @@ typedef struct { Py_ssize_t width; enum LocaleType thousands_separators; Py_ssize_t precision; + enum LocaleType frac_thousands_separator; Py_UCS4 type; } InternalFormatSpec; @@ -171,6 +172,7 @@ parse_internal_render_format_spec(PyObject *obj, format->sign = '\0'; format->width = -1; format->thousands_separators = LT_NO_LOCALE; + format->frac_thousands_separator = LT_NO_LOCALE; format->precision = -1; format->type = default_type; @@ -260,7 +262,35 @@ parse_internal_render_format_spec(PyObject *obj, /* Overflow error. Exception already set. */ return 0; - /* Not having a precision after a dot is an error. */ + if (end-pos && READ_spec(pos) == ',') { + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_DEFAULT_LOCALE; + ++pos; + ++consumed; + } + if (end-pos && READ_spec(pos) == '_') { + if (format->frac_thousands_separator != LT_NO_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_UNDERSCORE_LOCALE; + ++pos; + ++consumed; + } + if (end-pos && READ_spec(pos) == ',') { + if (format->frac_thousands_separator == LT_UNDERSCORE_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + } + + /* Not having a precision or underscore/comma after a dot + is an error. */ if (consumed == 0) { PyErr_Format(PyExc_ValueError, "Format specifier missing precision"); @@ -327,6 +357,14 @@ parse_internal_render_format_spec(PyObject *obj, } } + if (format->type == 'n' + && format->frac_thousands_separator != LT_NO_LOCALE) + { + invalid_thousands_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } + assert (format->align <= 127); assert (format->sign <= 127); return 1; @@ -402,6 +440,7 @@ fill_padding(_PyUnicodeWriter *writer, typedef struct { PyObject *decimal_point; PyObject *thousands_sep; + PyObject *frac_thousands_sep; const char *grouping; char *grouping_buffer; } LocaleInfo; @@ -423,6 +462,8 @@ typedef struct { Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, excluding the decimal itself, if present. */ + Py_ssize_t n_frac; + Py_ssize_t n_grouped_frac_digits; /* These 2 are not the widths of fields, but are needed by STRINGLIB_GROUPING. */ @@ -445,24 +486,32 @@ typedef struct { */ static void parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, - Py_ssize_t *n_remainder, int *has_decimal) + Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal) { - Py_ssize_t remainder; + Py_ssize_t frac; int kind = PyUnicode_KIND(s); const void *data = PyUnicode_DATA(s); - while (posn_digits = n_end - n_start - n_remainder - (has_decimal?1:0); + spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix; spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; spec->n_remainder = n_remainder; + spec->n_frac = n_frac; spec->n_spadding = 0; spec->n_rpadding = 0; spec->sign = '\0'; @@ -530,7 +580,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, /* The number of chars used for non-digits and non-padding. */ n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + - spec->n_remainder; + + spec->n_frac + spec->n_remainder; /* min_width can go negative, that's okay. format->width == -1 means we don't care. */ @@ -550,19 +600,36 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, NULL, 0, NULL, 0, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, &grouping_maxchar); + locale->grouping, locale->thousands_sep, &grouping_maxchar, 0); if (spec->n_grouped_digits == -1) { return -1; } *maxchar = Py_MAX(*maxchar, grouping_maxchar); } + if (spec->n_frac == 0) { + spec->n_grouped_frac_digits = 0; + } + else { + Py_UCS4 grouping_maxchar; + spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping( + NULL, 0, + NULL, 0, spec->n_frac, + spec->n_frac, + locale->grouping, locale->frac_thousands_sep, &grouping_maxchar, 1); + if (spec->n_grouped_frac_digits == -1) { + return -1; + } + *maxchar = Py_MAX(*maxchar, grouping_maxchar); + } + /* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can be negative (meaning no padding), but this code still works in that case. */ n_padding = format->width - - (n_non_digit_non_padding + spec->n_grouped_digits); + (n_non_digit_non_padding + spec->n_grouped_digits + + spec->n_grouped_frac_digits - spec->n_frac); if (n_padding > 0) { /* Some padding is needed. Determine if it's left, space, or right. */ switch (format->align) { @@ -593,7 +660,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + - spec->n_remainder + spec->n_rpadding; + spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding; } /* Fill in the digit parts of a number's string representation, @@ -649,7 +716,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, writer, spec->n_grouped_digits, digits, d_pos, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, NULL); + locale->grouping, locale->thousands_sep, NULL, 0); if (r == -1) return -1; assert(r == spec->n_grouped_digits); @@ -677,6 +744,19 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, d_pos += 1; } + if (spec->n_frac) { + r = _PyUnicode_InsertThousandsGrouping( + writer, spec->n_grouped_frac_digits, + digits, d_pos, spec->n_frac, spec->n_frac, + locale->grouping, locale->frac_thousands_sep, NULL, 1); + if (r == -1) { + return -1; + } + assert(r == spec->n_grouped_frac_digits); + d_pos += spec->n_frac; + writer->pos += spec->n_grouped_frac_digits; + } + if (spec->n_remainder) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, @@ -701,7 +781,8 @@ static const char no_grouping[1] = {CHAR_MAX}; LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int -get_locale_info(enum LocaleType type, LocaleInfo *locale_info) +get_locale_info(enum LocaleType type, enum LocaleType frac_type, + LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { @@ -746,6 +827,19 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) locale_info->grouping = no_grouping; break; } + if (frac_type != LT_NO_LOCALE) { + locale_info->frac_thousands_sep = PyUnicode_FromOrdinal( + frac_type == LT_DEFAULT_LOCALE ? ',' : '_'); + if (!locale_info->frac_thousands_sep) { + return -1; + } + if (locale_info->grouping == no_grouping) { + locale_info->grouping = "\3"; + } + } + else { + locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR); + } return 0; } @@ -754,6 +848,7 @@ free_locale_info(LocaleInfo *locale_info) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); + Py_XDECREF(locale_info->frac_thousands_sep); PyMem_Free(locale_info->grouping_buffer); } @@ -1005,13 +1100,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - format->thousands_separators, + format->thousands_separators, 0, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars, - inumeric_chars + n_digits, n_remainder, 0, + inumeric_chars + n_digits, n_remainder, 0, 0, &locale, format, &maxchar); if (n_total == -1) { goto done; @@ -1046,6 +1141,7 @@ format_float_internal(PyObject *value, char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; Py_ssize_t n_remainder; + Py_ssize_t n_frac; Py_ssize_t n_total; int has_decimal; double val; @@ -1125,7 +1221,8 @@ format_float_internal(PyObject *value, if (format->sign != '+' && format->sign != ' ' && format->width == -1 && format->type != 'n' - && !format->thousands_separators) + && !format->thousands_separators + && !format->frac_thousands_separator) { /* Fast path */ result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); @@ -1151,18 +1248,20 @@ format_float_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ - parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); + parse_number(unicode_tmp, index, index + n_digits, + &n_remainder, &n_frac, &has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, 0, sign_char, index, - index + n_digits, n_remainder, has_decimal, - &locale, format, &maxchar); + index + n_digits, n_remainder, n_frac, + has_decimal, &locale, format, &maxchar); if (n_total == -1) { goto done; } @@ -1202,6 +1301,8 @@ format_complex_internal(PyObject *value, Py_ssize_t n_im_digits; Py_ssize_t n_re_remainder; Py_ssize_t n_im_remainder; + Py_ssize_t n_re_frac; + Py_ssize_t n_im_frac; Py_ssize_t n_re_total; Py_ssize_t n_im_total; int re_has_decimal; @@ -1330,13 +1431,14 @@ format_complex_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, - &n_re_remainder, &re_has_decimal); + &n_re_remainder, &n_re_frac, &re_has_decimal); parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, - &n_im_remainder, &im_has_decimal); + &n_im_remainder, &n_im_frac, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; @@ -1349,8 +1451,8 @@ format_complex_internal(PyObject *value, /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, i_re, i_re + n_re_digits, n_re_remainder, - re_has_decimal, &locale, &tmp_format, - &maxchar); + n_re_frac, re_has_decimal, &locale, + &tmp_format, &maxchar); if (n_re_total == -1) { goto done; } @@ -1362,8 +1464,8 @@ format_complex_internal(PyObject *value, tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, i_im, i_im + n_im_digits, n_im_remainder, - im_has_decimal, &locale, &tmp_format, - &maxchar); + n_im_frac, im_has_decimal, &locale, + &tmp_format, &maxchar); if (n_im_total == -1) { goto done; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy