Skip to content

Commit cf7da9e

Browse files
committed
bug #61199 [JsonPath] Fix parsing invalid Unicode codepoints (nicolas-grekas)
This PR was merged into the 7.3 branch. Discussion ---------- [JsonPath] Fix parsing invalid Unicode codepoints | Q | A | ------------- | --- | Branch? | 7.3 | Bug fix? | yes | New feature? | no | Deprecations? | no | Issues | - | License | MIT Commits ------- 66c8a1a [JsonPath] Fix parsing invalid Unicode codepoints
2 parents ba445f4 + 66c8a1a commit cf7da9e

File tree

2 files changed

+24
-20
lines changed

2 files changed

+24
-20
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\JsonPath;
1313

1414
use Symfony\Component\JsonPath\Exception\InvalidArgumentException;
15+
use Symfony\Component\JsonPath\Exception\InvalidJsonPathException;
1516
use Symfony\Component\JsonPath\Exception\InvalidJsonStringInputException;
1617
use Symfony\Component\JsonPath\Exception\JsonCrawlerException;
1718
use Symfony\Component\JsonPath\Tokenizer\JsonPathToken;
@@ -83,7 +84,7 @@ private function evaluate(JsonPath $query): array
8384
return $this->evaluateTokensOnDecodedData($tokens, $data);
8485
} catch (InvalidArgumentException $e) {
8586
throw $e;
86-
} catch (\Throwable $e) {
87+
} catch (InvalidJsonPathException $e) {
8788
throw new JsonCrawlerException($query, $e->getMessage(), previous: $e);
8889
}
8990
}
@@ -329,7 +330,7 @@ private function evaluateBracket(string $expr, mixed $value): array
329330
return \array_key_exists($key, $value) ? [$value[$key]] : [];
330331
}
331332

332-
throw new \LogicException(\sprintf('Unsupported bracket expression "%s".', $expr));
333+
throw new InvalidJsonPathException(\sprintf('Unsupported bracket expression "%s".', $expr));
333334
}
334335

335336
private function evaluateFilter(string $expr, mixed $value): array

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public static function unescapeString(string $str, string $quoteChar): string
117117
't' => "\t",
118118
'u' => self::unescapeUnicodeSequence($str, $i),
119119
$quoteChar => $quoteChar,
120-
default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
120+
default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string.', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
121121
};
122122

123123
++$i;
@@ -132,30 +132,33 @@ public static function unescapeString(string $str, string $quoteChar): string
132132
private static function unescapeUnicodeSequence(string $str, int &$i): string
133133
{
134134
if (!isset($str[$i + 5]) || !ctype_xdigit(substr($str, $i + 2, 4))) {
135-
throw new JsonCrawlerException('', 'Invalid unicode escape sequence');
135+
throw new JsonCrawlerException('', 'Invalid unicode escape sequence.');
136136
}
137137

138-
$hex = substr($str, $i + 2, 4);
138+
$codepoint = hexdec(substr($str, $i + 2, 4));
139139

140-
$codepoint = hexdec($hex);
141140
// looks like a valid Unicode codepoint, string length is sufficient and it starts with \u
142-
if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && isset($str[$i + 11]) && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) {
143-
$lowHex = substr($str, $i + 8, 4);
144-
if (ctype_xdigit($lowHex)) {
145-
$lowSurrogate = hexdec($lowHex);
146-
if (0xDC00 <= $lowSurrogate && $lowSurrogate <= 0xDFFF) {
147-
$codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
148-
$i += 10; // skip surrogate pair
149-
150-
return mb_chr($codepoint, 'UTF-8');
151-
}
152-
}
141+
if (0xD800 <= $codepoint
142+
&& $codepoint <= 0xDBFF
143+
&& isset($str[$i + 11])
144+
&& '\\' === $str[$i + 6]
145+
&& 'u' === $str[$i + 7]
146+
&& ctype_xdigit($lowSurrogate = substr($str, $i + 8, 4))
147+
&& 0xDC00 <= ($lowSurrogate = hexdec($lowSurrogate))
148+
&& $lowSurrogate <= 0xDFFF
149+
) {
150+
$codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
151+
$i += 10; // skip surrogate pair
152+
} else {
153+
// single Unicode character or invalid surrogate, skip the sequence
154+
$i += 4;
153155
}
154156

155-
// single Unicode character or invalid surrogate, skip the sequence
156-
$i += 4;
157+
if (false === $chr = mb_chr($codepoint, 'UTF-8')) {
158+
throw new JsonCrawlerException('', \sprintf('Invalid Unicode codepoint: U+%04X.', $codepoint));
159+
}
157160

158-
return mb_chr($codepoint, 'UTF-8');
161+
return $chr;
159162
}
160163

161164
/**

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy