From 5858d8c84f33dff7af357f102e0f99cfc425bd50 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 21 Sep 2024 08:27:08 +0300 Subject: [PATCH 01/12] gh-87790: support underscore for formatting fractional part of floats ```pycon >>> f"{123_456.123_456:_._f}" # Whole and fractional '123_456.123_456' >>> f"{123_456.123_456:_f}" # Integer component only '123_456.123456' >>> f"{123_456.123_456:._f}" # Fractional component only '123456.123_456' >>> f"{123_456.123_456:.4_f}" # with precision '123456.1_235' ``` --- Doc/library/string.rst | 10 +- Lib/test/test_float.py | 12 ++ ...4-10-11-10-41-05.gh-issue-87790.mlfEGl.rst | 3 + Python/formatter_unicode.c | 125 ++++++++++++++---- 4 files changed, 122 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 57a1f920523035..943efdc31ae2c0 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -312,12 +312,13 @@ non-empty format specification typically modifies the result. The general form of a *standard format specifier* is: .. productionlist:: format-spec - format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`] + format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`fraction_grouping`]][`type`] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " width: `~python-grammar:digit`+ grouping_option: "_" | "," + fraction_grouping: "_" precision: `~python-grammar:digit`+ type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" @@ -448,6 +449,13 @@ indicates the maximum field size - in other words, how many characters will be used from the field content. The *precision* is not allowed for integer presentation types. +The ``'_'`` option after *precision* means the use of an underscore for a +thousands separator of the fractional part for floating-point presentation +types. + +.. versionchanged:: 3.14 + Support underscore as a thousands separator for the fractional part. + Finally, the *type* determines how the data should be presented. The available string presentation types are: diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index f588e16b70123a..962893498e151f 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -754,6 +754,18 @@ def test_format(self): self.assertEqual(format(INF, 'f'), 'inf') self.assertEqual(format(INF, 'F'), 'INF') + # underscores + x = 123_456.123_456 + self.assertEqual(format(x, '_f'), '123_456.123456') + self.assertEqual(format(x, '._f'), '123456.123_456') + self.assertEqual(format(x, '_._f'), '123_456.123_456') + self.assertEqual(format(x, '.10_f'), '123456.1_234_560_000') + self.assertEqual(format(x, '>21._f'), ' 123456.123_456') + self.assertEqual(format(x, '<21._f'), '123456.123_456 ') + self.assertEqual(format(x, '+.11_e'), '+1.23_456_123_456e+05') + + self.assertRaises(ValueError, format, x , '._6f') + @support.requires_IEEE_754 def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst new file mode 100644 index 00000000000000..523270fda34e7d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst @@ -0,0 +1,3 @@ +Support underscore as a thousands separator in the fractional part for +floating-point presentation types of the new-style string formatting (with +:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev. diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 16f711184990ac..0a7404f3100b42 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -135,6 +135,7 @@ typedef struct { Py_ssize_t width; enum LocaleType thousands_separators; Py_ssize_t precision; + enum LocaleType frac_thousands_separator; Py_UCS4 type; } InternalFormatSpec; @@ -171,6 +172,7 @@ parse_internal_render_format_spec(PyObject *obj, format->sign = '\0'; format->width = -1; format->thousands_separators = LT_NO_LOCALE; + format->frac_thousands_separator = LT_NO_LOCALE; format->precision = -1; format->type = default_type; @@ -260,7 +262,16 @@ parse_internal_render_format_spec(PyObject *obj, /* Overflow error. Exception already set. */ return 0; - /* Not having a precision after a dot is an error. */ + if (end-pos && READ_spec(pos) == '_') { + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_UNDERSCORE_LOCALE; + ++pos; + ++consumed; + } + + /* Not having a precision or underscore after a dot is an error. */ if (consumed == 0) { PyErr_Format(PyExc_ValueError, "Format specifier missing precision"); @@ -402,6 +413,7 @@ fill_padding(_PyUnicodeWriter *writer, typedef struct { PyObject *decimal_point; PyObject *thousands_sep; + PyObject *frac_thousands_sep; const char *grouping; char *grouping_buffer; } LocaleInfo; @@ -423,6 +435,8 @@ typedef struct { Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, excluding the decimal itself, if present. */ + Py_ssize_t n_frac; + Py_ssize_t n_grouped_frac_digits; /* These 2 are not the widths of fields, but are needed by STRINGLIB_GROUPING. */ @@ -445,24 +459,32 @@ typedef struct { */ static void parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, - Py_ssize_t *n_remainder, int *has_decimal) + Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal) { - Py_ssize_t remainder; + Py_ssize_t frac; int kind = PyUnicode_KIND(s); const void *data = PyUnicode_DATA(s); - while (posn_digits = n_end - n_start - n_remainder - (has_decimal?1:0); + spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix; spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; spec->n_remainder = n_remainder; + spec->n_frac = n_frac; spec->n_spadding = 0; spec->n_rpadding = 0; spec->sign = '\0'; @@ -530,7 +553,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, /* The number of chars used for non-digits and non-padding. */ n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + - spec->n_remainder; + + spec->n_frac + spec->n_remainder; /* min_width can go negative, that's okay. format->width == -1 means we don't care. */ @@ -557,12 +580,29 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, *maxchar = Py_MAX(*maxchar, grouping_maxchar); } + if (spec->n_frac == 0) { + spec->n_grouped_frac_digits = 0; + } + else { + Py_UCS4 grouping_maxchar; + spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping( + NULL, 0, + NULL, 0, spec->n_frac, + spec->n_frac, + locale->grouping, locale->frac_thousands_sep, &grouping_maxchar); + if (spec->n_grouped_frac_digits == -1) { + return -1; + } + *maxchar = Py_MAX(*maxchar, grouping_maxchar); + } + /* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can be negative (meaning no padding), but this code still works in that case. */ n_padding = format->width - - (n_non_digit_non_padding + spec->n_grouped_digits); + (n_non_digit_non_padding + spec->n_grouped_digits + + spec->n_grouped_frac_digits - spec->n_frac); if (n_padding > 0) { /* Some padding is needed. Determine if it's left, space, or right. */ switch (format->align) { @@ -593,7 +633,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + - spec->n_remainder + spec->n_rpadding; + spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding; } /* Fill in the digit parts of a number's string representation, @@ -677,6 +717,19 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, d_pos += 1; } + if (spec->n_frac) { + r = _PyUnicode_InsertThousandsGrouping( + writer, spec->n_grouped_frac_digits, + digits, d_pos, spec->n_frac, spec->n_frac, + locale->grouping, locale->frac_thousands_sep, NULL); + if (r == -1) { + return -1; + } + assert(r == spec->n_grouped_frac_digits); + d_pos += spec->n_frac; + writer->pos += spec->n_grouped_frac_digits; + } + if (spec->n_remainder) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, @@ -701,7 +754,8 @@ static const char no_grouping[1] = {CHAR_MAX}; LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int -get_locale_info(enum LocaleType type, LocaleInfo *locale_info) +get_locale_info(enum LocaleType type, enum LocaleType frac_type, + LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { @@ -746,6 +800,15 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) locale_info->grouping = no_grouping; break; } + if (frac_type == LT_UNDERSCORE_LOCALE) { + locale_info->frac_thousands_sep = PyUnicode_FromOrdinal('_'); + if (locale_info->grouping == no_grouping) { + locale_info->grouping = "\3"; + } + } + else { + locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR); + } return 0; } @@ -754,6 +817,7 @@ free_locale_info(LocaleInfo *locale_info) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); + Py_XDECREF(locale_info->frac_thousands_sep); PyMem_Free(locale_info->grouping_buffer); } @@ -1005,13 +1069,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - format->thousands_separators, + format->thousands_separators, 0, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars, - inumeric_chars + n_digits, n_remainder, 0, + inumeric_chars + n_digits, n_remainder, 0, 0, &locale, format, &maxchar); if (n_total == -1) { goto done; @@ -1046,6 +1110,7 @@ format_float_internal(PyObject *value, char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; Py_ssize_t n_remainder; + Py_ssize_t n_frac; Py_ssize_t n_total; int has_decimal; double val; @@ -1125,7 +1190,8 @@ format_float_internal(PyObject *value, if (format->sign != '+' && format->sign != ' ' && format->width == -1 && format->type != 'n' - && !format->thousands_separators) + && !format->thousands_separators + && !format->frac_thousands_separator) { /* Fast path */ result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); @@ -1151,18 +1217,20 @@ format_float_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ - parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); + parse_number(unicode_tmp, index, index + n_digits, + &n_remainder, &n_frac, &has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, 0, sign_char, index, - index + n_digits, n_remainder, has_decimal, - &locale, format, &maxchar); + index + n_digits, n_remainder, n_frac, + has_decimal, &locale, format, &maxchar); if (n_total == -1) { goto done; } @@ -1202,6 +1270,8 @@ format_complex_internal(PyObject *value, Py_ssize_t n_im_digits; Py_ssize_t n_re_remainder; Py_ssize_t n_im_remainder; + Py_ssize_t n_re_frac; + Py_ssize_t n_im_frac; Py_ssize_t n_re_total; Py_ssize_t n_im_total; int re_has_decimal; @@ -1330,13 +1400,14 @@ format_complex_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, - &n_re_remainder, &re_has_decimal); + &n_re_remainder, &n_re_frac, &re_has_decimal); parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, - &n_im_remainder, &im_has_decimal); + &n_im_remainder, &n_im_frac, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; @@ -1349,8 +1420,8 @@ format_complex_internal(PyObject *value, /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, i_re, i_re + n_re_digits, n_re_remainder, - re_has_decimal, &locale, &tmp_format, - &maxchar); + n_re_frac, re_has_decimal, &locale, + &tmp_format, &maxchar); if (n_re_total == -1) { goto done; } @@ -1362,8 +1433,8 @@ format_complex_internal(PyObject *value, tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, i_im, i_im + n_im_digits, n_im_remainder, - im_has_decimal, &locale, &tmp_format, - &maxchar); + n_im_frac, im_has_decimal, &locale, + &tmp_format, &maxchar); if (n_im_total == -1) { goto done; } From d7b378476e1af62f16be8e747b66c862f68ca322 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 11 Oct 2024 19:16:54 +0300 Subject: [PATCH 02/12] put separators in fractional part, iterating from most significand digit --- Include/internal/pycore_unicodeobject.h | 3 ++- Lib/test/test_float.py | 4 ++-- Objects/stringlib/localeutil.h | 27 +++++++++++++++++++------ Objects/unicodeobject.c | 13 ++++++------ Python/formatter_unicode.c | 8 ++++---- 5 files changed, 36 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index a60372f58295a9..13c3213132568b 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -246,7 +246,8 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Py_ssize_t min_width, const char *grouping, PyObject *thousands_sep, - Py_UCS4 *maxchar); + Py_UCS4 *maxchar, + int forward); /* --- Misc functions ----------------------------------------------------- */ diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 962893498e151f..9e0409154d979b 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -759,10 +759,10 @@ def test_format(self): self.assertEqual(format(x, '_f'), '123_456.123456') self.assertEqual(format(x, '._f'), '123456.123_456') self.assertEqual(format(x, '_._f'), '123_456.123_456') - self.assertEqual(format(x, '.10_f'), '123456.1_234_560_000') + self.assertEqual(format(x, '.10_f'), '123456.123_456_000_0') self.assertEqual(format(x, '>21._f'), ' 123456.123_456') self.assertEqual(format(x, '<21._f'), '123456.123_456 ') - self.assertEqual(format(x, '+.11_e'), '+1.23_456_123_456e+05') + self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05') self.assertRaises(ValueError, format, x , '._6f') diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index d77715ec0de9ef..a4ab701de004c8 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -47,7 +47,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, PyObject *digits, Py_ssize_t *digits_pos, Py_ssize_t n_chars, Py_ssize_t n_zeros, PyObject *thousands_sep, Py_ssize_t thousands_sep_len, - Py_UCS4 *maxchar) + Py_UCS4 *maxchar, int forward) { if (!writer) { /* if maxchar > 127, maxchar is already set */ @@ -59,24 +59,39 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, } if (thousands_sep) { - *buffer_pos -= thousands_sep_len; - + if (!forward) { + *buffer_pos -= thousands_sep_len; + } /* Copy the thousands_sep chars into the buffer. */ _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, thousands_sep, 0, thousands_sep_len); + if (forward) { + *buffer_pos += thousands_sep_len; + } } - *buffer_pos -= n_chars; - *digits_pos -= n_chars; + if (!forward) { + *buffer_pos -= n_chars; + *digits_pos -= n_chars; + } _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, digits, *digits_pos, n_chars); + if (forward) { + *buffer_pos += n_chars; + *digits_pos += n_chars; + } if (n_zeros) { - *buffer_pos -= n_zeros; + if (!forward) { + *buffer_pos -= n_zeros; + } int kind = PyUnicode_KIND(writer->buffer); void *data = PyUnicode_DATA(writer->buffer); unicode_fill(kind, data, '0', *buffer_pos, n_zeros); + if (forward) { + *buffer_pos += n_zeros; + } } } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b94a74c2c688a9..64562fe926be06 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9488,7 +9488,8 @@ _PyUnicode_InsertThousandsGrouping( Py_ssize_t min_width, const char *grouping, PyObject *thousands_sep, - Py_UCS4 *maxchar) + Py_UCS4 *maxchar, + int forward) { min_width = Py_MAX(0, min_width); if (writer) { @@ -9525,14 +9526,14 @@ _PyUnicode_InsertThousandsGrouping( should be an empty string */ assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); - digits_pos = d_pos + n_digits; + digits_pos = d_pos + (forward ? 0 : n_digits); if (writer) { - buffer_pos = writer->pos + n_buffer; + buffer_pos = writer->pos + (forward ? 0 : n_buffer); assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); } else { - buffer_pos = n_buffer; + buffer_pos = forward ? 0 : n_buffer; } if (!writer) { @@ -9554,7 +9555,7 @@ _PyUnicode_InsertThousandsGrouping( digits, &digits_pos, n_chars, n_zeros, use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar); + thousands_sep_len, maxchar, forward); /* Use a separator next time. */ use_separator = 1; @@ -9583,7 +9584,7 @@ _PyUnicode_InsertThousandsGrouping( digits, &digits_pos, n_chars, n_zeros, use_separator ? thousands_sep : NULL, - thousands_sep_len, maxchar); + thousands_sep_len, maxchar, forward); } return count; } diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 0a7404f3100b42..abece124562676 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -573,7 +573,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, NULL, 0, NULL, 0, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, &grouping_maxchar); + locale->grouping, locale->thousands_sep, &grouping_maxchar, 0); if (spec->n_grouped_digits == -1) { return -1; } @@ -589,7 +589,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, NULL, 0, NULL, 0, spec->n_frac, spec->n_frac, - locale->grouping, locale->frac_thousands_sep, &grouping_maxchar); + locale->grouping, locale->frac_thousands_sep, &grouping_maxchar, 1); if (spec->n_grouped_frac_digits == -1) { return -1; } @@ -689,7 +689,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, writer, spec->n_grouped_digits, digits, d_pos, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, NULL); + locale->grouping, locale->thousands_sep, NULL, 0); if (r == -1) return -1; assert(r == spec->n_grouped_digits); @@ -721,7 +721,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, r = _PyUnicode_InsertThousandsGrouping( writer, spec->n_grouped_frac_digits, digits, d_pos, spec->n_frac, spec->n_frac, - locale->grouping, locale->frac_thousands_sep, NULL); + locale->grouping, locale->frac_thousands_sep, NULL, 1); if (r == -1) { return -1; } From 5b70fffea9f7cb204204ed2691346692d7d895a0 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 12 Oct 2024 05:43:07 +0300 Subject: [PATCH 03/12] address review: formatting --- Python/formatter_unicode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index abece124562676..cdfea7206e9b69 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -755,7 +755,7 @@ static const char no_grouping[1] = {CHAR_MAX}; LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int get_locale_info(enum LocaleType type, enum LocaleType frac_type, - LocaleInfo *locale_info) + LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { From ca70d89cbb0602dedd0ff4031e7c59d4d3628791 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 16 Nov 2024 06:00:43 +0300 Subject: [PATCH 04/12] address review: check return value of PyUnicode_FromOrdinal() --- Python/formatter_unicode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index cdfea7206e9b69..56acc80553a640 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -802,6 +802,9 @@ get_locale_info(enum LocaleType type, enum LocaleType frac_type, } if (frac_type == LT_UNDERSCORE_LOCALE) { locale_info->frac_thousands_sep = PyUnicode_FromOrdinal('_'); + if (!locale_info->frac_thousands_sep) { + return -1; + } if (locale_info->grouping == no_grouping) { locale_info->grouping = "\3"; } From 28b91378bd4552a1159526cc0a8e7b4b5d8b7bfd Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 16 Nov 2024 06:35:58 +0300 Subject: [PATCH 05/12] address review: support comma --- Doc/library/string.rst | 11 +++++------ Lib/test/test_float.py | 1 + .../2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst | 2 +- Python/formatter_unicode.c | 15 +++++++++++++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 0fc0c4c00a13de..07bc0b9d4b5aa2 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -312,13 +312,12 @@ non-empty format specification typically modifies the result. The general form of a *standard format specifier* is: .. productionlist:: format-spec - format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`fraction_grouping`]][`type`] + format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`grouping_option`]][`type`] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " width: `~python-grammar:digit`+ grouping_option: "_" | "," - fraction_grouping: "_" precision: `~python-grammar:digit`+ type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" @@ -450,12 +449,12 @@ indicates the maximum field size - in other words, how many characters will be used from the field content. The *precision* is not allowed for integer presentation types. -The ``'_'`` option after *precision* means the use of an underscore for a -thousands separator of the fractional part for floating-point presentation -types. +The ``'_'`` or ``','`` option after *precision* means the use of an underscore +or a comma for a thousands separator of the fractional part for floating-point +presentation types. .. versionchanged:: 3.14 - Support underscore as a thousands separator for the fractional part. + Support thousands separators for the fractional part. Finally, the *type* determines how the data should be presented. diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 9e0409154d979b..8ef2b393a34939 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -763,6 +763,7 @@ def test_format(self): self.assertEqual(format(x, '>21._f'), ' 123456.123_456') self.assertEqual(format(x, '<21._f'), '123456.123_456 ') self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05') + self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05') self.assertRaises(ValueError, format, x , '._6f') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst index 523270fda34e7d..46a761f22e562f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-10-41-05.gh-issue-87790.mlfEGl.rst @@ -1,3 +1,3 @@ -Support underscore as a thousands separator in the fractional part for +Support underscore and comma as thousands separators in the fractional part for floating-point presentation types of the new-style string formatting (with :func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev. diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 56acc80553a640..68f276f98d3f26 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -270,6 +270,16 @@ parse_internal_render_format_spec(PyObject *obj, ++pos; ++consumed; } + if (end-pos && READ_spec(pos) == ',' + && format->frac_thousands_separator == LT_NO_LOCALE) + { + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_DEFAULT_LOCALE; + ++pos; + ++consumed; + } /* Not having a precision or underscore after a dot is an error. */ if (consumed == 0) { @@ -800,8 +810,9 @@ get_locale_info(enum LocaleType type, enum LocaleType frac_type, locale_info->grouping = no_grouping; break; } - if (frac_type == LT_UNDERSCORE_LOCALE) { - locale_info->frac_thousands_sep = PyUnicode_FromOrdinal('_'); + if (frac_type != LT_NO_LOCALE) { + locale_info->frac_thousands_sep = PyUnicode_FromOrdinal( + frac_type == LT_DEFAULT_LOCALE ? ',' : '_'); if (!locale_info->frac_thousands_sep) { return -1; } From 0a0b3b0c6efbcc04eeeeae661e39555ac6cb2351 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 16 Nov 2024 11:11:43 +0300 Subject: [PATCH 06/12] address review: fix error handling --- Lib/test/test_float.py | 4 +++- Python/formatter_unicode.c | 23 ++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 8ef2b393a34939..ba486cc3016a78 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -765,7 +765,9 @@ def test_format(self): self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05') self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05') - self.assertRaises(ValueError, format, x , '._6f') + self.assertRaises(ValueError, format, x, '._6f') + self.assertRaises(ValueError, format, x, '.,_6f') + self.assertRaises(ValueError, format, x, '._,6f') @support.requires_IEEE_754 def test_format_testfile(self): diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 68f276f98d3f26..3490fef5b02c26 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -262,26 +262,35 @@ parse_internal_render_format_spec(PyObject *obj, /* Overflow error. Exception already set. */ return 0; - if (end-pos && READ_spec(pos) == '_') { + if (end-pos && READ_spec(pos) == ',') { if (consumed == 0) { format->precision = -1; } - format->frac_thousands_separator = LT_UNDERSCORE_LOCALE; + format->frac_thousands_separator = LT_DEFAULT_LOCALE; ++pos; ++consumed; } - if (end-pos && READ_spec(pos) == ',' - && format->frac_thousands_separator == LT_NO_LOCALE) - { + if (end-pos && READ_spec(pos) == '_') { + if (format->frac_thousands_separator != LT_NO_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } if (consumed == 0) { format->precision = -1; } - format->frac_thousands_separator = LT_DEFAULT_LOCALE; + format->frac_thousands_separator = LT_UNDERSCORE_LOCALE; ++pos; ++consumed; } + if (end-pos && READ_spec(pos) == ',') { + if (format->frac_thousands_separator == LT_UNDERSCORE_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + } - /* Not having a precision or underscore after a dot is an error. */ + /* Not having a precision or underscore/comma after a dot + is an error. */ if (consumed == 0) { PyErr_Format(PyExc_ValueError, "Format specifier missing precision"); From 59aab3c06239099e397958e4f18eba5ff6c16ad2 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 16 Nov 2024 11:18:55 +0300 Subject: [PATCH 07/12] add examples --- Doc/library/string.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 07bc0b9d4b5aa2..b20cfceeed13e9 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -707,6 +707,17 @@ Using the comma as a thousands separator:: >>> '{:,}'.format(1234567890) '1,234,567,890' +Using the underscore as a thousands separator:: + + >>> '{:_}'.format(1234567890) + '1_234_567_890' + >>> '{:_}'.format(123456789.123456789) + '123_456_789.12345679' + >>> '{:._}'.format(123456789.123456789) + '123456789.123_456_79' + >>> '{:_._}'.format(123456789.123456789) + '123_456_789.123_456_79' + Expressing a percentage:: >>> points = 19 From 02d44de7b6bf28d57d6164d0f0a433d69857e51b Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 16 Nov 2024 12:18:31 +0300 Subject: [PATCH 08/12] address review: merge examples for comma/underscore --- Doc/library/string.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index b20cfceeed13e9..12f7b0227c494f 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -702,13 +702,10 @@ Replacing ``%x`` and ``%o`` and converting the value to different bases:: >>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42) 'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010' -Using the comma as a thousands separator:: +Using the comma or the underscore as a thousands separator:: >>> '{:,}'.format(1234567890) '1,234,567,890' - -Using the underscore as a thousands separator:: - >>> '{:_}'.format(1234567890) '1_234_567_890' >>> '{:_}'.format(123456789.123456789) From e35961680ad675761923875704950b7654799b1c Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 18 Nov 2024 05:58:54 +0300 Subject: [PATCH 09/12] address review: forbid separators for ``n`` --- Lib/test/test_float.py | 4 +++- Python/formatter_unicode.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index ba486cc3016a78..06fe2089ff1429 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -754,7 +754,7 @@ def test_format(self): self.assertEqual(format(INF, 'f'), 'inf') self.assertEqual(format(INF, 'F'), 'INF') - # underscores + # thousands separators x = 123_456.123_456 self.assertEqual(format(x, '_f'), '123_456.123456') self.assertEqual(format(x, '._f'), '123456.123_456') @@ -768,6 +768,8 @@ def test_format(self): self.assertRaises(ValueError, format, x, '._6f') self.assertRaises(ValueError, format, x, '.,_6f') self.assertRaises(ValueError, format, x, '._,6f') + self.assertRaises(ValueError, format, x, '.6_n') + self.assertRaises(ValueError, format, x, '.6,n') @support.requires_IEEE_754 def test_format_testfile(self): diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 3490fef5b02c26..4f8662b0a4c8fb 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -357,6 +357,14 @@ parse_internal_render_format_spec(PyObject *obj, } } + if (format->type == 'n' + && format->frac_thousands_separator != LT_NO_LOCALE) + { + invalid_thousands_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } + assert (format->align <= 127); assert (format->sign <= 127); return 1; From ea851c3c4d30fc85507ab0b3dc356a40336eedc8 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 18 Nov 2024 18:10:16 +0300 Subject: [PATCH 10/12] address review: more tests --- Lib/test/test_float.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 06fe2089ff1429..a2e34faaead9f6 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -757,9 +757,13 @@ def test_format(self): # thousands separators x = 123_456.123_456 self.assertEqual(format(x, '_f'), '123_456.123456') + self.assertEqual(format(x, ',f'), '123,456.123456') self.assertEqual(format(x, '._f'), '123456.123_456') + self.assertEqual(format(x, '.,f'), '123456.123,456') self.assertEqual(format(x, '_._f'), '123_456.123_456') + self.assertEqual(format(x, ',.,f'), '123,456.123,456') self.assertEqual(format(x, '.10_f'), '123456.123_456_000_0') + self.assertEqual(format(x, '.10,f'), '123456.123,456,000,0') self.assertEqual(format(x, '>21._f'), ' 123456.123_456') self.assertEqual(format(x, '<21._f'), '123456.123_456 ') self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05') From 4d3f0e32b0d2474524dd32fab9c7ad5a9ed24f17 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 18 Nov 2024 18:13:08 +0300 Subject: [PATCH 11/12] address review: add to whatsnew --- Doc/whatsnew/3.14.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 958efbe73c1c27..03be88bdd70203 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -202,6 +202,11 @@ Other language changes making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`126012`.) +* Support underscore and comma as thousands separators in the fractional part + for floating-point presentation types of the new-style string formatting + (with :func:`format` or :ref:`f-strings`). + (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) + New modules =========== From 893421da8912cfa4ddadfe087da4562297cef486 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 8 Feb 2025 08:16:43 +0300 Subject: [PATCH 12/12] address review: format-spec & more tests --- Doc/library/string.rst | 9 +++++++-- Lib/test/test_float.py | 5 +++-- Lib/test/test_format.py | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index e4c5674481c383..721c5c8d334674 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -319,14 +319,19 @@ non-empty format specification typically modifies the result. The general form of a *standard format specifier* is: .. productionlist:: format-spec - format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`grouping_option`]][`type`] + format_spec: [`options`][`width_and_precision`][`type`] + options: [[`fill`]`align`][`sign`]["z"]["#"]["0"] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " + width_and_precision: [`width_with_grouping`][`precision_with_grouping`] + width_with_grouping: [`width`][`grouping_option`] + precision_with_grouping: "." [`precision`]`grouping_option` width: `~python-grammar:digit`+ grouping_option: "_" | "," precision: `~python-grammar:digit`+ - type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" + type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" + : | "G" | "n" | "o" | "s" | "x" | "X" | "%" If a valid *align* value is specified, it can be preceded by a *fill* character that can be any character and defaults to a space if omitted. diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index a2e34faaead9f6..231b1047f72b39 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -770,8 +770,9 @@ def test_format(self): self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05') self.assertRaises(ValueError, format, x, '._6f') - self.assertRaises(ValueError, format, x, '.,_6f') - self.assertRaises(ValueError, format, x, '._,6f') + self.assertRaises(ValueError, format, x, '.,_f') + self.assertRaises(ValueError, format, x, '.6,_f') + self.assertRaises(ValueError, format, x, '.6_,f') self.assertRaises(ValueError, format, x, '.6_n') self.assertRaises(ValueError, format, x, '.6,n') diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 9dde63e40d06db..3916bc3d4cd54c 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -515,11 +515,15 @@ def test_with_a_commas_and_an_underscore_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): '{:,_}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.,_f}'.format(1.1) def test_with_an_underscore_and_a_comma_in_format_specifier(self): error_msg = re.escape("Cannot specify both ',' and '_'.") with self.assertRaisesRegex(ValueError, error_msg): '{:_,}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:._,f}'.format(1.1) def test_better_error_message_format(self): # https://bugs.python.org/issue20524 pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy