From ff4ca5cf07f2de5b60f4c4618e04508b7cd918ae Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 17:43:20 +0200 Subject: [PATCH 1/9] Only allow 0-9 digits in MIME parameter section numbers --- Lib/email/_header_value_parser.py | 9 +++++++-- Lib/test/test_email/test__header_value_parser.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91243378dc0441..347d75b480a8b0 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,17 +2398,22 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ + def is_allowed_digit(c): + # We don't use str.isdigit because only 0-9 are accepted, not + # super-script and other types of digits. + return c in {'0','1','2','3','4','5','6','7','8','9'} + section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not value[0].isdigit(): + if not value or not is_allowed_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and value[0].isdigit(): + while value and is_allowed_digit(value[0]): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 179e236ecdfd7f..716a4381fffbde 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2982,6 +2982,16 @@ def mime_parameters_as_value(self, 'r*=\'a\'"', [('r', '"')], [errors.InvalidHeaderDefect]*2), + + # bpo-42946: Unicode super-script digits (and others) are not allowed + # as section numbers. + 'non_allowed_digits': ( + 'foo*0=bar; foo*²=baz', + ' foo="bar"', + 'foo*0=bar; foo*²=baz', + [('foo', 'bar')], + [errors.InvalidHeaderDefect]), + } @parameterize From 656b6fb5e1ce3972409c0a580524875a555b7cdb Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 17:45:20 +0200 Subject: [PATCH 2/9] Use gh bug nubmer --- Lib/test/test_email/test__header_value_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 716a4381fffbde..2f259e9d1c0dc0 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,7 +2983,7 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # bpo-42946: Unicode super-script digits (and others) are not allowed + # gh-87112: Unicode super-script digits (and others) are not allowed # as section numbers. 'non_allowed_digits': ( 'foo*0=bar; foo*²=baz', From 75636a5d3d3bd6b40d60057b1c5b1b3b9710dbe2 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 18:00:05 +0200 Subject: [PATCH 3/9] Add news entry --- .../next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst new file mode 100644 index 00000000000000..25fe38a2e36812 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -0,0 +1,2 @@ +Do not fail when non-0-9 digit (e.g. super-script digit) is used as section +number in MIME parameter. From b15c4045927baf9ba763b29df9acbcbc0d80a7aa Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:17:49 +0200 Subject: [PATCH 4/9] Comments from PR review, remove inner function --- Lib/email/_header_value_parser.py | 11 ++++------- Lib/test/test_email/test__header_value_parser.py | 1 - 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 347d75b480a8b0..cff3dd4aa9aec7 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,22 +2398,19 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ - def is_allowed_digit(c): - # We don't use str.isdigit because only 0-9 are accepted, not - # super-script and other types of digits. - return c in {'0','1','2','3','4','5','6','7','8','9'} - section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not is_allowed_digit(value[0]): + # We don't use str.isdigit because only 0-9 are accepted, not super-script + # and other types of digits. + if not value or not '0' <= value[0] <= '9': raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and is_allowed_digit(value[0]): + while value and '0' <= value[0] <= '9': digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 2f259e9d1c0dc0..e4a383f259e847 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2991,7 +2991,6 @@ def mime_parameters_as_value(self, 'foo*0=bar; foo*²=baz', [('foo', 'bar')], [errors.InvalidHeaderDefect]), - } @parameterize From 7cba42f982a73b4ec8892c81dfd17fc9652487d8 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:20:37 +0200 Subject: [PATCH 5/9] Rephrase News entry --- .../Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst index 25fe38a2e36812..cb72b8eea1a3d3 100644 --- a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -1,2 +1,2 @@ -Do not fail when non-0-9 digit (e.g. super-script digit) is used as section -number in MIME parameter. +Ensure that only ASCII digits are accepted as section number in MIME header +parameter. From d2d59eee08a60353f622c8de670ef250ea01ef71 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:35:30 +0200 Subject: [PATCH 6/9] Comments from PR review, improve phrasing --- Lib/email/_header_value_parser.py | 7 +++---- Lib/test/test_email/test__header_value_parser.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index cff3dd4aa9aec7..ed5224d6349f4f 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2404,13 +2404,12 @@ def get_section(value): value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - # We don't use str.isdigit because only 0-9 are accepted, not super-script - # and other types of digits. - if not value or not '0' <= value[0] <= '9': + # We don't use str.isdigit because only ASCII digits are allowed. + if not value or not ('0' <= value[0] <= '9'): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and '0' <= value[0] <= '9': + while value and ('0' <= value[0] <= '9'): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index e4a383f259e847..b150196f60527a 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,8 +2983,7 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # gh-87112: Unicode super-script digits (and others) are not allowed - # as section numbers. + # gh-87112: Only ASCII digits can be section numbers. 'non_allowed_digits': ( 'foo*0=bar; foo*²=baz', ' foo="bar"', From 54bbbb752c2ae0b7dc8dba04a66ca5b3df850cd9 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:50:10 +0200 Subject: [PATCH 7/9] Use separate function --- Lib/email/_header_value_parser.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index ed5224d6349f4f..b8c639e4a70ced 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,18 +2398,21 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ + def is_ascii_digit(d): + # We don't use str.isdigit because only ASCII digits are allowed. + return '0' <= d <= '9' + section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - # We don't use str.isdigit because only ASCII digits are allowed. - if not value or not ('0' <= value[0] <= '9'): + if not value or not is_ascii_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and ('0' <= value[0] <= '9'): + while value and is_ascii_digit(value[0]): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': From f12e424b92a0722258b85c6553ad9e94e218155c Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 27 Jul 2025 14:16:49 +0200 Subject: [PATCH 8/9] Accept digits that are convertible to int for backwards compatibility --- Lib/email/_header_value_parser.py | 20 ++++++++++++++----- .../test_email/test__header_value_parser.py | 11 ++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index b8c639e4a70ced..8684bdb8dfd5c6 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,9 +2398,16 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ - def is_ascii_digit(d): - # We don't use str.isdigit because only ASCII digits are allowed. - return '0' <= d <= '9' + def is_accepted_digit(d): + # While only ASCII digits are allowed by the RFC, we accept any digit + # that can be converted to an int for backwards compatibility purposes. + # We don't use str.isdigit() as some Unicode digits are not convertible + # (e.g. superscript digits). + try: + int(d) + return True + except ValueError: + return False section = Section() if not value or value[0] != '*': @@ -2408,11 +2415,14 @@ def is_ascii_digit(d): value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not is_ascii_digit(value[0]): + if not value or not is_accepted_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and is_ascii_digit(value[0]): + while value and is_accepted_digit(value[0]): + if not '0' <= value[0] <= '9': + section.defects.append(errors.InvalidHeaderDefect( + "section number has a non-ASCII digit {}".format(value[0]))) digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index b150196f60527a..2eaf73bad89afa 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,13 +2983,20 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # gh-87112: Only ASCII digits can be section numbers. - 'non_allowed_digits': ( + # gh-87112: Only digits convertible to integers can be section numbers. + 'non_accepted_digit': ( 'foo*0=bar; foo*²=baz', ' foo="bar"', 'foo*0=bar; foo*²=baz', [('foo', 'bar')], [errors.InvalidHeaderDefect]), + + 'non_ascii_digit_backwards_compatibility': ( + 'foo*0=bar; foo*߁=baz', # NKO digit '1' + ' foo="barbaz"', + 'foo*0=bar; foo*߁=baz', + [('foo', 'barbaz')], + [errors.InvalidHeaderDefect]), } @parameterize From b3643399e60bcec00d6756f04e032f95fe3c288e Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 27 Jul 2025 14:18:24 +0200 Subject: [PATCH 9/9] Update news message --- .../Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst index cb72b8eea1a3d3..ba4e72de85e317 100644 --- a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -1,2 +1,2 @@ -Ensure that only ASCII digits are accepted as section number in MIME header -parameter. +Ensure that only digits convertible to integers are accepted as section number +in MIME header parameter. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy