Skip to content

Commit 419f872

Browse files
[JsonPath] Handle special whitespaces in filters
1 parent c492fc0 commit 419f872

File tree

6 files changed

+543
-414
lines changed

6 files changed

+543
-414
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 134 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array
133133
return [];
134134
}
135135

136-
if ('*' === $expr) {
136+
if (str_contains($expr, ',') && (str_starts_with($trimmed = trim($expr), ',') || str_ends_with($trimmed, ','))) {
137+
throw new JsonCrawlerException($expr, 'Expression cannot have leading or trailing commas');
138+
}
139+
140+
if ('*' === $expr = JsonPathUtils::normalizeWhitespace($expr)) {
137141
return array_values($value);
138142
}
139143

@@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array
168172
return $result;
169173
}
170174

171-
// start, end and step
172-
if (preg_match('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/', $expr, $matches)) {
175+
if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $expr, $matches)) {
173176
if (!array_is_list($value)) {
174177
return [];
175178
}
@@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array
217220

218221
// filter expressions
219222
if (preg_match('/^\?(.*)$/', $expr, $matches)) {
220-
$filterExpr = $matches[1];
221-
222-
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) {
223+
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr = trim($matches[1]))) {
223224
$filterExpr = "($filterExpr)";
224225
}
225226

226227
if (!str_starts_with($filterExpr, '(')) {
227-
throw new JsonCrawlerException($expr, 'Invalid filter expression');
228+
$filterExpr = "($filterExpr)";
228229
}
229230

230231
// remove outer filter parentheses
@@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array
235236

236237
// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
237238
if (str_contains($expr, ',')) {
238-
$parts = $this->parseCommaSeparatedValues($expr);
239+
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
239240

240241
$result = [];
241-
$keysIndices = array_keys($value);
242-
$isList = array_is_list($value);
243242

244243
foreach ($parts as $part) {
245244
$part = trim($part);
246245

247-
if (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
246+
if ('*' === $part) {
247+
$result = array_merge($result, array_values($value));
248+
} elseif (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $part, $matches)) {
249+
// slice notation
250+
$sliceResult = $this->evaluateBracket($part, $value);
251+
$result = array_merge($result, $sliceResult);
252+
} elseif (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
248253
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
249254

250-
if ($isList) {
255+
if (array_is_list($value)) {
256+
// for arrays, find ALL objects that contain this key
251257
foreach ($value as $item) {
252258
if (\is_array($item) && \array_key_exists($key, $item)) {
253259
$result[] = $item;
254-
break;
255260
}
256261
}
257-
258-
continue; // no results here
259-
}
260-
261-
if (\array_key_exists($key, $value)) {
262+
} elseif (\array_key_exists($key, $value)) { // for objects, get the value for this key
262263
$result[] = $value[$key];
263264
}
264265
} elseif (preg_match('/^-?\d+$/', $part)) {
@@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array
268269
$index = \count($value) + $index;
269270
}
270271

271-
if ($isList && \array_key_exists($index, $value)) {
272+
if (array_is_list($value) && \array_key_exists($index, $value)) {
272273
$result[] = $value[$index];
273-
continue;
274-
}
275-
276-
// numeric index on a hashmap
277-
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278-
$result[] = $value[$keysIndices[$index]];
274+
} else {
275+
// numeric index on a hashmap
276+
$keysIndices = array_keys($value);
277+
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278+
$result[] = $value[$keysIndices[$index]];
279+
}
279280
}
280281
}
281282
}
@@ -310,7 +311,28 @@ private function evaluateFilter(string $expr, mixed $value): array
310311

311312
private function evaluateFilterExpression(string $expr, mixed $context): bool
312313
{
313-
$expr = trim($expr);
314+
$expr = JsonPathUtils::normalizeWhitespace($expr);
315+
316+
// remove outer parentheses if they wrap the entire expression
317+
if (str_starts_with($expr, '(') && str_ends_with($expr, ')')) {
318+
$depth = 0;
319+
$isWrapped = true;
320+
for ($i = 0; $i < \strlen($expr); ++$i) {
321+
if ('(' === $expr[$i]) {
322+
++$depth;
323+
} elseif (')' === $expr[$i] && 0 === --$depth && $i < \strlen($expr) - 1) {
324+
$isWrapped = false;
325+
break;
326+
}
327+
}
328+
if ($isWrapped) {
329+
$expr = trim(substr($expr, 1, -1));
330+
}
331+
}
332+
333+
if (str_starts_with($expr, '!')) {
334+
return !$this->evaluateFilterExpression(trim(substr($expr, 1)), $context);
335+
}
314336

315337
if (str_contains($expr, '&&')) {
316338
$parts = array_map('trim', explode('&&', $expr));
@@ -353,8 +375,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
353375
}
354376

355377
// function calls
356-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
357-
$functionName = $matches[1];
378+
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
379+
$functionName = trim($matches[1]);
358380
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
359381
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
360382
}
@@ -369,8 +391,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
369391

370392
private function evaluateScalar(string $expr, mixed $context): mixed
371393
{
372-
if (is_numeric($expr)) {
373-
return str_contains($expr, '.') ? (float) $expr : (int) $expr;
394+
$expr = JsonPathUtils::normalizeWhitespace($expr);
395+
396+
if (JsonPathUtils::isJsonNumber($expr)) {
397+
return str_contains($expr, '.') || str_contains(strtolower($expr), 'e') ? (float) $expr : (int) $expr;
398+
}
399+
400+
// only validate tokens that look like standalone numbers
401+
if (preg_match('/^[\d+\-.eE]+$/', $expr) && preg_match('/\d/', $expr)) {
402+
throw new JsonCrawlerException($expr, \sprintf('Invalid number format "%s"', $expr));
374403
}
375404

376405
if ('@' === $expr) {
@@ -404,9 +433,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed
404433
}
405434

406435
// function calls
407-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
408-
$functionName = $matches[1];
409-
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
436+
if (preg_match('/^(\w++)\((.*)\)$/', $expr, $matches)) {
437+
if (!isset(self::RFC9535_FUNCTIONS[$functionName = trim($matches[1])])) {
410438
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
411439
}
412440

@@ -416,31 +444,60 @@ private function evaluateScalar(string $expr, mixed $context): mixed
416444
return null;
417445
}
418446

419-
private function evaluateFunction(string $name, string $args, array $context): mixed
447+
private function evaluateFunction(string $name, string $args, mixed $context): mixed
420448
{
421-
$args = array_map(
422-
fn ($arg) => $this->evaluateScalar(trim($arg), $context),
423-
explode(',', $args)
424-
);
449+
$argList = [];
450+
$nodelistSizes = [];
451+
if ($args = trim($args)) {
452+
$args = JsonPathUtils::parseCommaSeparatedValues($args);
453+
foreach ($args as $arg) {
454+
$arg = trim($arg);
455+
if (str_starts_with($arg, '$')) { // special handling for absolute paths
456+
$results = $this->evaluate(new JsonPath($arg));
457+
$argList[] = $results[0] ?? null;
458+
$nodelistSizes[] = \count($results);
459+
} elseif (!str_starts_with($arg, '@')) { // special handling for @ to track nodelist size
460+
$argList[] = $this->evaluateScalar($arg, $context);
461+
$nodelistSizes[] = 1;
462+
} elseif ('@' === $arg) {
463+
$argList[] = $context;
464+
$nodelistSizes[] = 1;
465+
} elseif (!\is_array($context)) {
466+
$argList[] = null;
467+
$nodelistSizes[] = 0;
468+
} elseif (str_starts_with($pathPart = substr($arg, 1), '[')) {
469+
// handle bracket expressions like @['a','d']
470+
$results = $this->evaluateBracket(substr($pathPart, 1, -1), $context);
471+
$argList[] = $results;
472+
$nodelistSizes[] = \count($results);
473+
} else {
474+
// handle dot notation like @.a
475+
$results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$pathPart)), $context);
476+
$argList[] = $results[0] ?? null;
477+
$nodelistSizes[] = \count($results);
478+
}
479+
}
480+
}
425481

426-
$value = $args[0] ?? null;
482+
$value = $argList[0] ?? null;
483+
$nodelistSize = $nodelistSizes[0] ?? 0;
427484

428485
return match ($name) {
429486
'length' => match (true) {
430487
\is_string($value) => mb_strlen($value),
431488
\is_array($value) => \count($value),
432489
default => 0,
433490
},
434-
'count' => \is_array($value) ? \count($value) : 0,
491+
'count' => $nodelistSize,
435492
'match' => match (true) {
436-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $args[1]), $value),
493+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/u', $this->transformJsonPathRegex($argList[1])), $value),
437494
default => false,
438495
},
439496
'search' => match (true) {
440-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match("/$args[1]/", $value),
497+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$this->transformJsonPathRegex($argList[1])}/u", $value),
441498
default => false,
442499
},
443-
'value' => $value,
500+
'value' => 1 < $nodelistSize ? null : (1 === $nodelistSize ? (\is_array($value) ? ($value[0] ?? null) : $value) : $value),
444501
default => null,
445502
};
446503
}
@@ -474,43 +531,52 @@ private function compare(mixed $left, mixed $right, string $operator): bool
474531
};
475532
}
476533

477-
private function parseCommaSeparatedValues(string $expr): array
534+
/*
535+
* Transform JSONPath regex patterns to comply with RFC 9535. The main issue is
536+
* that '.' should not match \r or \n but should match Unicode line
537+
* separators U+2028 and U+2029.
538+
*/
539+
private function transformJsonPathRegex(string $pattern): string
478540
{
479-
$parts = [];
480-
$current = '';
481-
$inQuotes = false;
482-
$quoteChar = null;
541+
$result = '';
542+
$inCharClass = false;
543+
$escaped = false;
544+
$length = \strlen($pattern);
483545

484-
for ($i = 0; $i < \strlen($expr); ++$i) {
485-
$char = $expr[$i];
546+
for ($i = 0; $i < $length; ++$i) {
547+
$char = $pattern[$i];
486548

487-
if ('\\' === $char && $i + 1 < \strlen($expr)) {
488-
$current .= $char.$expr[++$i];
549+
if ($escaped) {
550+
$result .= $char;
551+
$escaped = false;
489552
continue;
490553
}
491554

492-
if ('"' === $char || "'" === $char) {
493-
if (!$inQuotes) {
494-
$inQuotes = true;
495-
$quoteChar = $char;
496-
} elseif ($char === $quoteChar) {
497-
$inQuotes = false;
498-
$quoteChar = null;
499-
}
500-
} elseif (!$inQuotes && ',' === $char) {
501-
$parts[] = trim($current);
502-
$current = '';
555+
if ('\\' === $char) {
556+
$result .= $char;
557+
$escaped = true;
558+
continue;
559+
}
503560

561+
if ('[' === $char && !$inCharClass) {
562+
$inCharClass = true;
563+
$result .= $char;
504564
continue;
505565
}
506566

507-
$current .= $char;
508-
}
567+
if (']' === $char && $inCharClass) {
568+
$inCharClass = false;
569+
$result .= $char;
570+
continue;
571+
}
509572

510-
if ('' !== $current) {
511-
$parts[] = trim($current);
573+
if ('.' === $char && !$inCharClass) {
574+
$result .= '(?:[^\r\n]|\x{2028}|\x{2029})';
575+
} else {
576+
$result .= $char;
577+
}
512578
}
513579

514-
return $parts;
580+
return $result;
515581
}
516582
}

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,73 @@ private static function unescapeUnicodeSequence(string $str, int $length, int &$
159159

160160
return mb_chr($codepoint, 'UTF-8');
161161
}
162+
163+
/**
164+
* @see https://datatracker.ietf.org/doc/rfc9535/, section 2.1.1
165+
*/
166+
public static function normalizeWhitespace(string $input): string
167+
{
168+
$normalized = strtr($input, [
169+
"\t" => ' ',
170+
"\n" => ' ',
171+
"\r" => ' ',
172+
]);
173+
174+
return trim($normalized);
175+
}
176+
177+
/**
178+
* Check a number is RFC 9535 compliant using strict JSON number format.
179+
*/
180+
public static function isJsonNumber(string $value): bool
181+
{
182+
return preg_match('/^-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?$/', $value);
183+
}
184+
185+
public static function parseCommaSeparatedValues(string $expr): array
186+
{
187+
$parts = [];
188+
$current = '';
189+
$inQuotes = false;
190+
$quoteChar = null;
191+
$bracketDepth = 0;
192+
193+
for ($i = 0; $i < \strlen($expr); ++$i) {
194+
$char = $expr[$i];
195+
196+
if ('\\' === $char && $i + 1 < \strlen($expr)) {
197+
$current .= $char.$expr[++$i];
198+
continue;
199+
}
200+
201+
if ('"' === $char || "'" === $char) {
202+
if (!$inQuotes) {
203+
$inQuotes = true;
204+
$quoteChar = $char;
205+
} elseif ($char === $quoteChar) {
206+
$inQuotes = false;
207+
$quoteChar = null;
208+
}
209+
} elseif (!$inQuotes) {
210+
if ('[' === $char) {
211+
++$bracketDepth;
212+
} elseif (']' === $char) {
213+
--$bracketDepth;
214+
} elseif (0 === $bracketDepth && ',' === $char) {
215+
$parts[] = trim($current);
216+
$current = '';
217+
218+
continue;
219+
}
220+
}
221+
222+
$current .= $char;
223+
}
224+
225+
if ('' !== $current) {
226+
$parts[] = trim($current);
227+
}
228+
229+
return $parts;
230+
}
162231
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy