Skip to content

Commit f3e1136

Browse files
authored
Merge pull request #756 from byroot/utf8-snippets
Ensure parser error snippets are valid UTF-8
2 parents b86a47d + e144793 commit f3e1136

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Changes
22

3+
* Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
4+
35
### 2025-02-10 (2.10.1)
46

57
* Fix a compatibility issue with `MultiJson.dump(obj, pretty: true)`: `no implicit conversion of false into Proc (TypeError)`.

ext/json/ext/parser/parser.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -454,15 +454,24 @@ RBIMPL_ATTR_NORETURN()
454454
#endif
455455
static void raise_parse_error(const char *format, const char *start)
456456
{
457-
char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
457+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
458458

459459
size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
460460
const char *ptr = start;
461461

462462
if (len == PARSE_ERROR_FRAGMENT_LEN) {
463463
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
464-
buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
465-
ptr = buffer;
464+
465+
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
466+
len--;
467+
}
468+
469+
if (buffer[len - 1] >= 0xC0) { // multibyte character start
470+
len--;
471+
}
472+
473+
buffer[len] = '\0';
474+
ptr = (const char *)buffer;
466475
}
467476

468477
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);

test/json/json_parser_test.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,22 @@ def test_parse_error_incomplete_hash
645645
end
646646
end
647647

648+
def test_parse_error_snippet
649+
omit "C ext only test" unless RUBY_ENGINE == "ruby"
650+
651+
error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") }
652+
assert_equal "unexpected character: 'ああああああああああ'", error.message
653+
654+
error = assert_raise(JSON::ParserError) { JSON.parse("aあああああああああああああああああああああああ") }
655+
assert_equal "unexpected character: 'aああああああああああ'", error.message
656+
657+
error = assert_raise(JSON::ParserError) { JSON.parse("abあああああああああああああああああああああああ") }
658+
assert_equal "unexpected character: 'abあああああああああ'", error.message
659+
660+
error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") }
661+
assert_equal "unexpected character: 'abcあああああああああ'", error.message
662+
end
663+
648664
def test_parse_leading_slash
649665
# ref: https://github.com/ruby/ruby/pull/12598
650666
assert_raise(JSON::ParserError) do

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy