Skip to content

Commit ab0853a

Browse files
[JsonPath] Handle special whitespaces in filters
1 parent cf3b527 commit ab0853a

File tree

5 files changed

+118
-26
lines changed

5 files changed

+118
-26
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ private function evaluateBracket(string $expr, mixed $value): array
128128
return [];
129129
}
130130

131+
$expr = JsonPathUtils::normalizeWhitespace($expr);
131132
if ('*' === $expr) {
132133
return array_values($value);
133134
}
@@ -150,8 +151,8 @@ private function evaluateBracket(string $expr, mixed $value): array
150151
}
151152

152153
$result = [];
153-
foreach (explode(',', $expr) as $index) {
154-
$index = (int) trim($index);
154+
foreach (explode(',', $expr) as $indexStr) {
155+
$index = (int) trim($indexStr);
155156
if ($index < 0) {
156157
$index = \count($value) + $index;
157158
}
@@ -163,8 +164,7 @@ private function evaluateBracket(string $expr, mixed $value): array
163164
return $result;
164165
}
165166

166-
// start, end and step
167-
if (preg_match('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/', $expr, $matches)) {
167+
if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $expr, $matches)) {
168168
if (!array_is_list($value)) {
169169
return [];
170170
}
@@ -212,7 +212,7 @@ private function evaluateBracket(string $expr, mixed $value): array
212212

213213
// filter expressions
214214
if (preg_match('/^\?(.*)$/', $expr, $matches)) {
215-
$filterExpr = $matches[1];
215+
$filterExpr = trim($matches[1]);
216216

217217
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) {
218218
$filterExpr = "($filterExpr)";
@@ -260,12 +260,12 @@ private function evaluateFilter(string $expr, mixed $value): array
260260

261261
private function evaluateFilterExpression(string $expr, array $context): bool
262262
{
263-
$expr = trim($expr);
263+
$expr = JsonPathUtils::normalizeWhitespace($expr);
264264

265265
if (str_contains($expr, '&&')) {
266266
$parts = array_map('trim', explode('&&', $expr));
267267
foreach ($parts as $part) {
268-
if (!$this->evaluateFilterExpression($part, $context)) {
268+
if (!$this->evaluateFilterExpression(trim($part), $context)) {
269269
return false;
270270
}
271271
}
@@ -277,7 +277,7 @@ private function evaluateFilterExpression(string $expr, array $context): bool
277277
$parts = array_map('trim', explode('||', $expr));
278278
$result = false;
279279
foreach ($parts as $part) {
280-
$result = $result || $this->evaluateFilterExpression($part, $context);
280+
$result = $result || $this->evaluateFilterExpression(trim($part), $context);
281281
}
282282

283283
return $result;
@@ -301,8 +301,8 @@ private function evaluateFilterExpression(string $expr, array $context): bool
301301
}
302302

303303
// function calls
304-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
305-
$functionName = $matches[1];
304+
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
305+
$functionName = trim($matches[1]);
306306
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
307307
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
308308
}
@@ -317,6 +317,8 @@ private function evaluateFilterExpression(string $expr, array $context): bool
317317

318318
private function evaluateScalar(string $expr, array $context): mixed
319319
{
320+
$expr = JsonPathUtils::normalizeWhitespace($expr);
321+
320322
if (is_numeric($expr)) {
321323
return str_contains($expr, '.') ? (float) $expr : (int) $expr;
322324
}
@@ -346,8 +348,8 @@ private function evaluateScalar(string $expr, array $context): mixed
346348
}
347349

348350
// function calls
349-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
350-
$functionName = $matches[1];
351+
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
352+
$functionName = trim($matches[1]);
351353
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
352354
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
353355
}
@@ -360,12 +362,15 @@ private function evaluateScalar(string $expr, array $context): mixed
360362

361363
private function evaluateFunction(string $name, string $args, array $context): mixed
362364
{
363-
$args = array_map(
364-
fn ($arg) => $this->evaluateScalar(trim($arg), $context),
365-
explode(',', $args)
366-
);
365+
$argList = [];
366+
if (trim($args)) {
367+
$argList = array_map(
368+
fn ($arg) => $this->evaluateScalar(trim($arg), $context),
369+
preg_split('/\s*,\s*/', trim($args))
370+
);
371+
}
367372

368-
$value = $args[0] ?? null;
373+
$value = $argList[0] ?? null;
369374

370375
return match ($name) {
371376
'length' => match (true) {
@@ -375,11 +380,11 @@ private function evaluateFunction(string $name, string $args, array $context): m
375380
},
376381
'count' => \is_array($value) ? \count($value) : 0,
377382
'match' => match (true) {
378-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $args[1]), $value),
383+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $argList[1]), $value),
379384
default => false,
380385
},
381386
'search' => match (true) {
382-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match("/$args[1]/", $value),
387+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$argList[1]}/", $value),
383388
default => false,
384389
},
385390
'value' => $value,

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,18 @@ private static function unescapeUnicodeSequence(string $str, int $length, int &$
159159

160160
return mb_chr($codepoint, 'UTF-8');
161161
}
162+
163+
/**
164+
* @see https://datatracker.ietf.org/doc/rfc9535/, section 2.1.1
165+
*/
166+
public static function normalizeWhitespace(string $input): string
167+
{
168+
$normalized = strtr($input, [
169+
"\t" => ' ',
170+
"\n" => ' ',
171+
"\r" => ' ',
172+
]);
173+
174+
return trim($normalized);
175+
}
162176
}

src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,37 @@ public function testLengthFunctionWithOuterParentheses()
419419
$this->assertSame('J. R. R. Tolkien', $result[1]['author']);
420420
}
421421

422+
public function testFilterWithSpecialWhitespaces()
423+
{
424+
$result = self::getBookstoreCrawler()->find("$.store.book[?(length\n(@.author\t)>\r\n12)]");
425+
426+
$this->assertCount(2, $result);
427+
$this->assertSame('Herman Melville', $result[0]['author']);
428+
$this->assertSame('J. R. R. Tolkien', $result[1]['author']);
429+
}
430+
431+
public function testMatchFunctionWithMultipleSpacesTrimmed()
432+
{
433+
$result = self::getBookstoreCrawler()->find("$.store.book[?(match(@.title, 'Sword of Honour'))]");
434+
435+
$this->assertSame([], $result);
436+
}
437+
438+
public function testFilterMultiline()
439+
{
440+
$result = self::getBookstoreCrawler()->find(
441+
'$
442+
.store
443+
.book[?
444+
length(@.author)>12
445+
]'
446+
);
447+
448+
$this->assertCount(2, $result);
449+
$this->assertSame('Herman Melville', $result[0]['author']);
450+
$this->assertSame('J. R. R. Tolkien', $result[1]['author']);
451+
}
452+
422453
public function testCountFunction()
423454
{
424455
$result = self::getBookstoreCrawler()->find('$.store.book[?count(@.extra) != 0]');

src/Symfony/Component/JsonPath/Tests/Tokenizer/JsonPathTokenizerTest.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,7 @@ public static function provideInvalidUtf8PropertyName(): array
355355
'special char first' => ['#test'],
356356
'start with digit' => ['123test'],
357357
'asterisk' => ['test*test'],
358-
'space not allowed' => [' test'],
359358
'at sign not allowed' => ['@test'],
360-
'start control char' => ["\0test"],
361359
'ending control char' => ["test\xFF\xFA"],
362360
'dash sign' => ['-test'],
363361
];

src/Symfony/Component/JsonPath/Tokenizer/JsonPathTokenizer.php

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
*/
2222
final class JsonPathTokenizer
2323
{
24+
private const RFC9535_WHITESPACE_CHARS = [' ', "\t", "\n", "\r"];
25+
2426
/**
2527
* @return JsonPathToken[]
2628
*/
@@ -42,14 +44,26 @@ public static function tokenize(JsonPath $query): array
4244
throw new InvalidJsonPathException('empty JSONPath expression.');
4345
}
4446

45-
if ('$' !== $chars[0]) {
47+
$i = self::skipWhitespace($chars, 0, $length);
48+
if ($i >= $length || '$' !== $chars[$i]) {
4649
throw new InvalidJsonPathException('expression must start with $.');
4750
}
4851

4952
for ($i = 0; $i < $length; ++$i) {
5053
$char = $chars[$i];
5154
$position = $i;
5255

56+
if (!$inQuote && !$inBracket && self::isWhitespace($char)) {
57+
if ('' !== $current) {
58+
$tokens[] = new JsonPathToken(TokenType::Name, $current);
59+
$current = '';
60+
}
61+
62+
$i = self::skipWhitespace($chars, $i, $length) - 1; // -1 because loop will increment
63+
64+
continue;
65+
}
66+
5367
if (('"' === $char || "'" === $char) && !$inQuote) {
5468
$inQuote = true;
5569
$quoteChar = $char;
@@ -59,7 +73,7 @@ public static function tokenize(JsonPath $query): array
5973

6074
if ($inQuote) {
6175
$current .= $char;
62-
if ($char === $quoteChar && '\\' !== $chars[$i - 1]) {
76+
if ($char === $quoteChar && (0 === $i || '\\' !== $chars[$i - 1])) {
6377
$inQuote = false;
6478
}
6579
if ($i === $length - 1 && $inQuote) {
@@ -80,6 +94,8 @@ public static function tokenize(JsonPath $query): array
8094

8195
$inBracket = true;
8296
++$bracketDepth;
97+
$i = self::skipWhitespace($chars, $i + 1, $length) - 1; // -1 because loop will increment
98+
8399
continue;
84100
}
85101

@@ -94,11 +110,11 @@ public static function tokenize(JsonPath $query): array
94110
}
95111

96112
if (0 === $bracketDepth) {
97-
if ('' === $current) {
113+
if ('' === trim($current)) {
98114
throw new InvalidJsonPathException('empty brackets are not allowed.', $position);
99115
}
100116

101-
$tokens[] = new JsonPathToken(TokenType::Bracket, $current);
117+
$tokens[] = new JsonPathToken(TokenType::Bracket, trim($current));
102118
$current = '';
103119
$inBracket = false;
104120
$inFilter = false;
@@ -108,11 +124,15 @@ public static function tokenize(JsonPath $query): array
108124
}
109125

110126
if ('?' === $char && $inBracket && !$inFilter) {
111-
if ('' !== $current) {
127+
if ('' !== trim($current)) {
112128
throw new InvalidJsonPathException('unexpected characters before filter expression.', $position);
113129
}
130+
131+
$current = '?';
114132
$inFilter = true;
115133
$filterParenthesisDepth = 0;
134+
135+
continue;
116136
}
117137

118138
if ($inFilter) {
@@ -123,6 +143,15 @@ public static function tokenize(JsonPath $query): array
123143
throw new InvalidJsonPathException('unmatched closing parenthesis in filter.', $position);
124144
}
125145
}
146+
$current .= $char;
147+
148+
continue;
149+
}
150+
151+
if ($inBracket && self::isWhitespace($char)) {
152+
$current .= $char;
153+
154+
continue;
126155
}
127156

128157
// recursive descent
@@ -158,6 +187,7 @@ public static function tokenize(JsonPath $query): array
158187
throw new InvalidJsonPathException('unclosed string literal.', $length - 1);
159188
}
160189

190+
$current = trim($current);
161191
if ('' !== $current) {
162192
// final validation of the whole name
163193
if (!preg_match('/^(?:\*|[a-zA-Z_\x{0080}-\x{D7FF}\x{E000}-\x{10FFFF}][a-zA-Z0-9_\x{0080}-\x{D7FF}\x{E000}-\x{10FFFF}]*)$/u', $current)) {
@@ -169,4 +199,18 @@ public static function tokenize(JsonPath $query): array
169199

170200
return $tokens;
171201
}
202+
203+
private static function isWhitespace(string $char): bool
204+
{
205+
return \in_array($char, self::RFC9535_WHITESPACE_CHARS, true);
206+
}
207+
208+
private static function skipWhitespace(array $chars, int $index, int $length): int
209+
{
210+
while ($index < $length && self::isWhitespace($chars[$index])) {
211+
++$index;
212+
}
213+
214+
return $index;
215+
}
172216
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy