Skip to content

Commit 54e35eb

Browse files
[JsonPath] Better handling of Unicode chars in expressions
1 parent 0795d65 commit 54e35eb

File tree

3 files changed

+357
-2
lines changed

3 files changed

+357
-2
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,29 @@ private function evaluateBracket(string $expr, mixed $value): array
230230

231231
// quoted strings for object keys
232232
if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) {
233-
$key = stripslashes($matches[2]);
233+
$key = $this->unescapeString($matches[2], $matches[1]);
234234

235235
return \array_key_exists($key, $value) ? [$value[$key]] : [];
236236
}
237237

238238
throw new \LogicException(\sprintf('Unsupported bracket expression "%s".', $expr));
239239
}
240240

241+
private function unescapeString(string $str, string $quoteChar): string
242+
{
243+
if ('"' === $quoteChar) {
244+
// try JSON decoding first for unicode sequences
245+
$jsonStr = '"' . $str . '"';
246+
$decoded = json_decode($jsonStr, true);
247+
248+
if (null !== $decoded) {
249+
return $decoded;
250+
}
251+
}
252+
253+
return JsonPathUtils::unescapeString($str);
254+
}
255+
241256
private function evaluateFilter(string $expr, mixed $value): array
242257
{
243258
if (!\is_array($value)) {
@@ -335,7 +350,7 @@ private function evaluateScalar(string $expr, array $context): mixed
335350

336351
// string literals
337352
if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) {
338-
return $matches[2];
353+
return $this->unescapeString($matches[2], $matches[1]);
339354
}
340355

341356
// current node references

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,96 @@ public static function findSmallestDeserializableStringAndPath(array $tokens, mi
8585
'tokens' => $remainingTokens,
8686
];
8787
}
88+
89+
public static function unescapeString(string $str): string
90+
{
91+
$result = '';
92+
$length = strlen($str);
93+
94+
for ($i = 0; $i < $length; $i++) {
95+
if ('\\' === $str[$i] && $i + 1 < $length) {
96+
$nextChar = $str[$i + 1];
97+
98+
switch ($nextChar) {
99+
case '"':
100+
$result .= '"';
101+
$i++;
102+
break;
103+
case "'":
104+
$result .= "'";
105+
$i++;
106+
break;
107+
case '\\':
108+
$result .= '\\';
109+
$i++;
110+
break;
111+
case '/':
112+
$result .= '/';
113+
$i++;
114+
break;
115+
case 'b':
116+
$result .= "\b";
117+
$i++;
118+
break;
119+
case 'f':
120+
$result .= "\f";
121+
$i++;
122+
break;
123+
case 'n':
124+
$result .= "\n";
125+
$i++;
126+
break;
127+
case 'r':
128+
$result .= "\r";
129+
$i++;
130+
break;
131+
case 't':
132+
$result .= "\t";
133+
$i++;
134+
break;
135+
case 'u':
136+
if ($i + 5 < $length) {
137+
$hex = substr($str, $i + 2, 4);
138+
if (ctype_xdigit($hex)) {
139+
$codepoint = hexdec($hex);
140+
141+
if ($codepoint >= 0xD800 && $codepoint <= 0xDBFF && $i + 11 < $length) {
142+
if ('\\' === $str[$i + 6] && 'u' === $str[$i + 7]) {
143+
$lowHex = substr($str, $i + 8, 4);
144+
if (ctype_xdigit($lowHex)) {
145+
$lowSurrogate = hexdec($lowHex);
146+
if ($lowSurrogate >= 0xDC00 && $lowSurrogate <= 0xDFFF) {
147+
$codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
148+
$result .= mb_chr($codepoint, 'UTF-8');
149+
$i += 11; // skip both escape sequences
150+
break;
151+
}
152+
}
153+
}
154+
}
155+
156+
// single Unicode character or invalid surrogate
157+
$result .= mb_chr($codepoint, 'UTF-8');
158+
$i += 5;
159+
} else {
160+
// invalid hex, treat as literal
161+
$result .= $str[$i];
162+
}
163+
} else {
164+
// not enough characters for Unicode escape, treat as literal
165+
$result .= $str[$i];
166+
}
167+
break;
168+
default:
169+
// unknown escape sequence, keep the backslash
170+
$result .= $str[$i];
171+
break;
172+
}
173+
} else {
174+
$result .= $str[$i];
175+
}
176+
}
177+
178+
return $result;
179+
}
88180
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy