diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0274de6733ab0e..45998a13a62166 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( see Objects/stringlib/localeutil.h */ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( - PyObject *unicode, - Py_ssize_t index, + _PyUnicodeWriter *writer, Py_ssize_t n_buffer, - void *digits, + PyObject *digits, + Py_ssize_t d_pos, Py_ssize_t n_digits, Py_ssize_t min_width, const char *grouping, diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst new file mode 100644 index 00000000000000..9bfbe1644e16ac --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst @@ -0,0 +1,3 @@ +For :meth:`str.format`, :meth:`float.__format__` and +:meth:`complex.__format__` methods for non-ASCII decimal point when using +the "n" formatter. diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index df501ed05c7e97..31fed349caa57b 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -1,28 +1,24 @@ -/* stringlib: locale related helpers implementation */ - -#include - -#if !STRINGLIB_IS_UNICODE -# error "localeutil.h is specific to Unicode" -#endif +/* _PyUnicode_InsertThousandsGrouping() helper functions */ typedef struct { const char *grouping; char previous; Py_ssize_t i; /* Where we're currently pointing in grouping. */ -} STRINGLIB(GroupGenerator); +} GroupGenerator; + static void -STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping) +GroupGenerator_init(GroupGenerator *self, const char *grouping) { self->grouping = grouping; self->i = 0; self->previous = 0; } + /* Returns the next grouping, or 0 to signify end. */ static Py_ssize_t -STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) +GroupGenerator_next(GroupGenerator *self) { /* Note that we don't really do much error checking here. If a grouping string contains just CHAR_MAX, for example, then just @@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) } } + /* Fill in some digits, leading zeros, and thousands separator. All are optional, depending on when we're called. */ static void -STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, - Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep, - Py_ssize_t thousands_sep_len) +InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, + PyObject *digits, Py_ssize_t *digits_pos, + Py_ssize_t n_chars, Py_ssize_t n_zeros, + PyObject *thousands_sep, Py_ssize_t thousands_sep_len, + Py_UCS4 *maxchar) { - Py_ssize_t i; + if (!writer) { + /* if maxchar > 127, maxchar is already set */ + if (*maxchar == 127 && thousands_sep) { + Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep); + *maxchar = Py_MAX(*maxchar, maxchar2); + } + return; + } if (thousands_sep) { - *buffer_end -= thousands_sep_len; + *buffer_pos -= thousands_sep_len; /* Copy the thousands_sep chars into the buffer. */ - memcpy(*buffer_end, thousands_sep, - thousands_sep_len * STRINGLIB_SIZEOF_CHAR); - } - - *buffer_end -= n_chars; - *digits_end -= n_chars; - memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); - - *buffer_end -= n_zeros; - for (i = 0; i < n_zeros; i++) - (*buffer_end)[i] = '0'; -} - -/** - * InsertThousandsGrouping: - * @buffer: A pointer to the start of a string. - * @n_buffer: Number of characters in @buffer. - * @digits: A pointer to the digits we're reading from. If count - * is non-NULL, this is unused. - * @n_digits: The number of digits in the string, in which we want - * to put the grouping chars. - * @min_width: The minimum width of the digits in the output string. - * Output will be zero-padded on the left to fill. - * @grouping: see definition in localeconv(). - * @thousands_sep: see definition in localeconv(). - * - * There are 2 modes: counting and filling. If @buffer is NULL, - * we are in counting mode, else filling mode. - * If counting, the required buffer size is returned. - * If filling, we know the buffer will be large enough, so we don't - * need to pass in the buffer size. - * Inserts thousand grouping characters (as defined by grouping and - * thousands_sep) into the string between buffer and buffer+n_digits. - * - * Return value: 0 on error, else 1. Note that no error can occur if - * count is non-NULL. - * - * This name won't be used, the includer of this file should define - * it to be the actual function name, based on unicode or string. - * - * As closely as possible, this code mimics the logic in decimal.py's - _insert_thousands_sep(). - **/ -static Py_ssize_t -STRINGLIB(InsertThousandsGrouping)( - STRINGLIB_CHAR *buffer, - Py_ssize_t n_buffer, - STRINGLIB_CHAR *digits, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - STRINGLIB_CHAR *thousands_sep, - Py_ssize_t thousands_sep_len) -{ - Py_ssize_t count = 0; - Py_ssize_t n_zeros; - int loop_broken = 0; - int use_separator = 0; /* First time through, don't append the - separator. They only go between - groups. */ - STRINGLIB_CHAR *buffer_end = NULL; - STRINGLIB_CHAR *digits_end = NULL; - Py_ssize_t l; - Py_ssize_t n_chars; - Py_ssize_t remaining = n_digits; /* Number of chars remaining to - be looked at */ - /* A generator that returns all of the grouping widths, until it - returns 0. */ - STRINGLIB(GroupGenerator) groupgen; - STRINGLIB(GroupGenerator_init)(&groupgen, grouping); - - if (buffer) { - buffer_end = buffer + n_buffer; - digits_end = digits + n_digits; - } - - while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) { - l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1)); - n_zeros = Py_MAX(0, l - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, l)); - - /* Use n_zero zero's and n_chars chars */ - - /* Count only, don't do anything. */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - - if (buffer) { - /* Copy into the output buffer. */ - STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros, - use_separator ? thousands_sep : NULL, thousands_sep_len); - } - - /* Use a separator next time. */ - use_separator = 1; - - remaining -= n_chars; - min_width -= l; - - if (remaining <= 0 && min_width <= 0) { - loop_broken = 1; - break; - } - min_width -= thousands_sep_len; + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + thousands_sep, 0, + thousands_sep_len); } - if (!loop_broken) { - /* We left the loop without using a break statement. */ - l = Py_MAX(Py_MAX(remaining, min_width), 1); - n_zeros = Py_MAX(0, l - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, l)); - - /* Use n_zero zero's and n_chars chars */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - if (buffer) { - /* Copy into the output buffer. */ - STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros, - use_separator ? thousands_sep : NULL, thousands_sep_len); - } + *buffer_pos -= n_chars; + *digits_pos -= n_chars; + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + digits, *digits_pos, + n_chars); + + if (n_zeros) { + *buffer_pos -= n_zeros; + enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer); + void *data = PyUnicode_DATA(writer->buffer); + FILL(kind, data, '0', *buffer_pos, n_zeros); } - return count; } - diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 71eb654095a631..8dd7c3b8258c3c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -218,6 +218,31 @@ static PyObject *unicode_empty = NULL; return unicode_empty; \ } while (0) +#define FILL(kind, data, value, start, length) \ + do { \ + Py_ssize_t i_ = 0; \ + assert(kind != PyUnicode_WCHAR_KIND); \ + switch ((kind)) { \ + case PyUnicode_1BYTE_KIND: { \ + unsigned char * to_ = (unsigned char *)((data)) + (start); \ + memset(to_, (unsigned char)value, (length)); \ + break; \ + } \ + case PyUnicode_2BYTE_KIND: { \ + Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + case PyUnicode_4BYTE_KIND: { \ + Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + default: Py_UNREACHABLE(); \ + } \ + } while (0) + + /* Forward declaration */ static inline int _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); @@ -796,7 +821,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs1lib.h" @@ -807,7 +831,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs2lib.h" @@ -818,7 +841,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs4lib.h" @@ -829,7 +851,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/unicodedefs.h" @@ -9292,86 +9313,149 @@ any_find_slice(PyObject* s1, PyObject* s2, return result; } +/* _PyUnicode_InsertThousandsGrouping() helper functions */ +#include "stringlib/localeutil.h" + +/** + * InsertThousandsGrouping: + * @writer: Unicode writer. + * @n_buffer: Number of characters in @buffer. + * @digits: Digits we're reading from. If count is non-NULL, this is unused. + * @d_pos: Start of digits string. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. + * @grouping: see definition in localeconv(). + * @thousands_sep: see definition in localeconv(). + * + * There are 2 modes: counting and filling. If @writer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. + * Inserts thousand grouping characters (as defined by grouping and + * thousands_sep) into @writer. + * + * Return value: -1 on error, number of characters otherwise. + **/ Py_ssize_t _PyUnicode_InsertThousandsGrouping( - PyObject *unicode, Py_ssize_t index, + _PyUnicodeWriter *writer, Py_ssize_t n_buffer, - void *digits, Py_ssize_t n_digits, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, Py_ssize_t min_width, - const char *grouping, PyObject *thousands_sep, + const char *grouping, + PyObject *thousands_sep, Py_UCS4 *maxchar) { - unsigned int kind, thousands_sep_kind; - char *data, *thousands_sep_data; - Py_ssize_t thousands_sep_len; - Py_ssize_t len; - - if (unicode != NULL) { - kind = PyUnicode_KIND(unicode); - data = (char *) PyUnicode_DATA(unicode) + index * kind; + if (writer) { + assert(digits != NULL); + assert(maxchar == NULL); } else { - kind = PyUnicode_1BYTE_KIND; - data = NULL; - } - thousands_sep_kind = PyUnicode_KIND(thousands_sep); - thousands_sep_data = PyUnicode_DATA(thousands_sep); - thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); - if (unicode != NULL && thousands_sep_kind != kind) { - if (thousands_sep_kind < kind) { - thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind); - if (!thousands_sep_data) - return -1; - } - else { - data = _PyUnicode_AsKind(unicode, thousands_sep_kind); - if (!data) - return -1; + assert(digits == NULL); + assert(maxchar != NULL); + } + assert(0 <= d_pos); + assert(0 <= n_digits); + assert(0 <= min_width); + assert(grouping != NULL); + + if (digits != NULL) { + if (PyUnicode_READY(digits) == -1) { + return -1; } } + if (PyUnicode_READY(thousands_sep) == -1) { + return -1; + } - switch (kind) { - case PyUnicode_1BYTE_KIND: - if (unicode != NULL && PyUnicode_IS_ASCII(unicode)) - len = asciilib_InsertThousandsGrouping( - (Py_UCS1 *) data, n_buffer, (Py_UCS1 *) digits, n_digits, - min_width, grouping, - (Py_UCS1 *) thousands_sep_data, thousands_sep_len); - else - len = ucs1lib_InsertThousandsGrouping( - (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, - min_width, grouping, - (Py_UCS1 *) thousands_sep_data, thousands_sep_len); - break; - case PyUnicode_2BYTE_KIND: - len = ucs2lib_InsertThousandsGrouping( - (Py_UCS2 *) data, n_buffer, (Py_UCS2 *) digits, n_digits, - min_width, grouping, - (Py_UCS2 *) thousands_sep_data, thousands_sep_len); - break; - case PyUnicode_4BYTE_KIND: - len = ucs4lib_InsertThousandsGrouping( - (Py_UCS4 *) data, n_buffer, (Py_UCS4 *) digits, n_digits, - min_width, grouping, - (Py_UCS4 *) thousands_sep_data, thousands_sep_len); - break; - default: - Py_UNREACHABLE(); + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + Py_ssize_t buffer_pos; + Py_ssize_t digits_pos; + Py_ssize_t len; + Py_ssize_t n_chars; + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + GroupGenerator_init(&groupgen, grouping); + const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); + + /* if digits are not grouped, thousands separator + should be an empty string */ + assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); + + digits_pos = d_pos + n_digits; + if (writer) { + buffer_pos = writer->pos + n_buffer; + assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); + assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); } - if (unicode != NULL && thousands_sep_kind != kind) { - if (thousands_sep_kind < kind) - PyMem_Free(thousands_sep_data); - else - PyMem_Free(data); + else { + buffer_pos = n_buffer; } - if (unicode == NULL) { + + if (!writer) { *maxchar = 127; - if (len != n_digits) { - *maxchar = Py_MAX(*maxchar, - PyUnicode_MAX_CHAR_VALUE(thousands_sep)); + } + + while ((len = GroupGenerator_next(&groupgen)) > 0) { + len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1)); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar); + + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= len; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; } + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + len = Py_MAX(Py_MAX(remaining, min_width), 1); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar); } - return len; + return count; } @@ -9990,30 +10074,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq return NULL; } -#define FILL(kind, data, value, start, length) \ - do { \ - Py_ssize_t i_ = 0; \ - assert(kind != PyUnicode_WCHAR_KIND); \ - switch ((kind)) { \ - case PyUnicode_1BYTE_KIND: { \ - unsigned char * to_ = (unsigned char *)((data)) + (start); \ - memset(to_, (unsigned char)value, (length)); \ - break; \ - } \ - case PyUnicode_2BYTE_KIND: { \ - Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - case PyUnicode_4BYTE_KIND: { \ - Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - default: Py_UNREACHABLE(); \ - } \ - } while (0) - void _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 2cd3eb8eb75098..ef81d15bc568b4 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -461,7 +461,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, /* not all fields of format are used. for example, precision is unused. should this take discrete params in order to be more clear about what it does? or is passing a single format parameter easier - and more efficient enough to justify a little obfuscation? */ + and more efficient enough to justify a little obfuscation? + Return -1 on error. */ static Py_ssize_t calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, @@ -540,9 +541,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 grouping_maxchar; spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( NULL, 0, - 0, NULL, - spec->n_digits, spec->n_min_width, + NULL, 0, spec->n_digits, + spec->n_min_width, locale->grouping, locale->thousands_sep, &grouping_maxchar); + if (spec->n_grouped_digits == -1) { + return -1; + } *maxchar = Py_MAX(*maxchar, grouping_maxchar); } @@ -634,26 +638,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, /* Only for type 'c' special case, it has no digits. */ if (spec->n_digits != 0) { /* Fill the digits with InsertThousandsGrouping. */ - char *pdigits; - if (PyUnicode_READY(digits)) - return -1; - pdigits = PyUnicode_DATA(digits); - if (PyUnicode_KIND(digits) < kind) { - pdigits = _PyUnicode_AsKind(digits, kind); - if (pdigits == NULL) - return -1; - } r = _PyUnicode_InsertThousandsGrouping( - writer->buffer, writer->pos, - spec->n_grouped_digits, - pdigits + kind * d_pos, - spec->n_digits, spec->n_min_width, + writer, spec->n_grouped_digits, + digits, d_pos, spec->n_digits, + spec->n_min_width, locale->grouping, locale->thousands_sep, NULL); if (r == -1) return -1; assert(r == spec->n_grouped_digits); - if (PyUnicode_KIND(digits) < kind) - PyMem_Free(pdigits); d_pos += spec->n_digits; } if (toupper) { @@ -993,6 +985,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, inumeric_chars + n_digits, n_remainder, 0, &locale, format, &maxchar); + if (n_total == -1) { + goto done; + } /* Allocate the memory. */ if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) @@ -1138,6 +1133,9 @@ format_float_internal(PyObject *value, n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, index + n_digits, n_remainder, has_decimal, &locale, format, &maxchar); + if (n_total == -1) { + goto done; + } /* Allocate the memory. */ if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) @@ -1321,6 +1319,9 @@ format_complex_internal(PyObject *value, i_re, i_re + n_re_digits, n_re_remainder, re_has_decimal, &locale, &tmp_format, &maxchar); + if (n_re_total == -1) { + goto done; + } /* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was @@ -1331,6 +1332,9 @@ format_complex_internal(PyObject *value, i_im, i_im + n_im_digits, n_im_remainder, im_has_decimal, &locale, &tmp_format, &maxchar); + if (n_im_total == -1) { + goto done; + } if (skip_re) n_re_total = 0; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy