Skip to content

Commit a403f71

Browse files
[JsonPath] Handle special whitespaces in filters
1 parent c492fc0 commit a403f71

File tree

6 files changed

+544
-412
lines changed

6 files changed

+544
-412
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 137 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array
133133
return [];
134134
}
135135

136-
if ('*' === $expr) {
136+
if (str_contains($expr, ',') && (str_starts_with($trimmed = trim($expr), ',') || str_ends_with($trimmed, ','))) {
137+
throw new JsonCrawlerException($expr, 'Expression cannot have leading or trailing commas');
138+
}
139+
140+
if ('*' === $expr = JsonPathUtils::normalizeWhitespace($expr)) {
137141
return array_values($value);
138142
}
139143

@@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array
168172
return $result;
169173
}
170174

171-
// start, end and step
172-
if (preg_match('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/', $expr, $matches)) {
175+
if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $expr, $matches)) {
173176
if (!array_is_list($value)) {
174177
return [];
175178
}
@@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array
217220

218221
// filter expressions
219222
if (preg_match('/^\?(.*)$/', $expr, $matches)) {
220-
$filterExpr = $matches[1];
221-
222-
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) {
223+
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr = trim($matches[1]))) {
223224
$filterExpr = "($filterExpr)";
224225
}
225226

226227
if (!str_starts_with($filterExpr, '(')) {
227-
throw new JsonCrawlerException($expr, 'Invalid filter expression');
228+
$filterExpr = "($filterExpr)";
228229
}
229230

230231
// remove outer filter parentheses
@@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array
235236

236237
// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
237238
if (str_contains($expr, ',')) {
238-
$parts = $this->parseCommaSeparatedValues($expr);
239+
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
239240

240241
$result = [];
241-
$keysIndices = array_keys($value);
242-
$isList = array_is_list($value);
243242

244243
foreach ($parts as $part) {
245244
$part = trim($part);
246245

247-
if (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
246+
if ('*' === $part) {
247+
$result = array_merge($result, array_values($value));
248+
} elseif (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $part, $matches)) {
249+
// slice notation
250+
$sliceResult = $this->evaluateBracket($part, $value);
251+
$result = array_merge($result, $sliceResult);
252+
} elseif (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
248253
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
249254

250-
if ($isList) {
255+
if (array_is_list($value)) {
256+
// for arrays, find ALL objects that contain this key
251257
foreach ($value as $item) {
252258
if (\is_array($item) && \array_key_exists($key, $item)) {
253259
$result[] = $item;
254-
break;
255260
}
256261
}
257-
258-
continue; // no results here
259-
}
260-
261-
if (\array_key_exists($key, $value)) {
262+
} elseif (\array_key_exists($key, $value)) { // for objects, get the value for this key
262263
$result[] = $value[$key];
263264
}
264265
} elseif (preg_match('/^-?\d+$/', $part)) {
@@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array
268269
$index = \count($value) + $index;
269270
}
270271

271-
if ($isList && \array_key_exists($index, $value)) {
272+
if (array_is_list($value) && \array_key_exists($index, $value)) {
272273
$result[] = $value[$index];
273-
continue;
274-
}
275-
276-
// numeric index on a hashmap
277-
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278-
$result[] = $value[$keysIndices[$index]];
274+
} else {
275+
// numeric index on a hashmap
276+
$keysIndices = array_keys($value);
277+
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278+
$result[] = $value[$keysIndices[$index]];
279+
}
279280
}
280281
}
281282
}
@@ -310,7 +311,31 @@ private function evaluateFilter(string $expr, mixed $value): array
310311

311312
private function evaluateFilterExpression(string $expr, mixed $context): bool
312313
{
313-
$expr = trim($expr);
314+
$expr = JsonPathUtils::normalizeWhitespace($expr);
315+
316+
// remove outer parentheses if they wrap the entire expression
317+
if (str_starts_with($expr, '(') && str_ends_with($expr, ')')) {
318+
$depth = 0;
319+
$isWrapped = true;
320+
for ($i = 0; $i < \strlen($expr); ++$i) {
321+
if ('(' === $expr[$i]) {
322+
++$depth;
323+
} elseif (')' === $expr[$i]) {
324+
--$depth;
325+
if (0 === $depth && $i < \strlen($expr) - 1) {
326+
$isWrapped = false;
327+
break;
328+
}
329+
}
330+
}
331+
if ($isWrapped) {
332+
$expr = trim(substr($expr, 1, -1));
333+
}
334+
}
335+
336+
if (str_starts_with($expr, '!')) {
337+
return !$this->evaluateFilterExpression(trim(substr($expr, 1)), $context);
338+
}
314339

315340
if (str_contains($expr, '&&')) {
316341
$parts = array_map('trim', explode('&&', $expr));
@@ -353,8 +378,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
353378
}
354379

355380
// function calls
356-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
357-
$functionName = $matches[1];
381+
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
382+
$functionName = trim($matches[1]);
358383
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
359384
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
360385
}
@@ -369,8 +394,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
369394

370395
private function evaluateScalar(string $expr, mixed $context): mixed
371396
{
372-
if (is_numeric($expr)) {
373-
return str_contains($expr, '.') ? (float) $expr : (int) $expr;
397+
$expr = JsonPathUtils::normalizeWhitespace($expr);
398+
399+
if (JsonPathUtils::isJsonNumber($expr)) {
400+
return str_contains($expr, '.') || str_contains(strtolower($expr), 'e') ? (float) $expr : (int) $expr;
401+
}
402+
403+
// only validate tokens that look like standalone numbers
404+
if (preg_match('/^[\d+\-.eE]+$/', $expr) && preg_match('/\d/', $expr)) {
405+
throw new JsonCrawlerException($expr, \sprintf('Invalid number format "%s"', $expr));
374406
}
375407

376408
if ('@' === $expr) {
@@ -404,9 +436,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed
404436
}
405437

406438
// function calls
407-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
408-
$functionName = $matches[1];
409-
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
439+
if (preg_match('/^(\w++)\((.*)\)$/', $expr, $matches)) {
440+
if (!isset(self::RFC9535_FUNCTIONS[$functionName = trim($matches[1])])) {
410441
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
411442
}
412443

@@ -416,31 +447,60 @@ private function evaluateScalar(string $expr, mixed $context): mixed
416447
return null;
417448
}
418449

419-
private function evaluateFunction(string $name, string $args, array $context): mixed
450+
private function evaluateFunction(string $name, string $args, mixed $context): mixed
420451
{
421-
$args = array_map(
422-
fn ($arg) => $this->evaluateScalar(trim($arg), $context),
423-
explode(',', $args)
424-
);
452+
$argList = [];
453+
$nodelistSizes = [];
454+
if ($args = trim($args)) {
455+
$args = JsonPathUtils::parseCommaSeparatedValues($args);
456+
foreach ($args as $arg) {
457+
$arg = trim($arg);
458+
if (str_starts_with($arg, '$')) { // special handling for absolute paths
459+
$results = $this->evaluate(new JsonPath($arg));
460+
$argList[] = $results[0] ?? null;
461+
$nodelistSizes[] = \count($results);
462+
} elseif (!str_starts_with($arg, '@')) { // special handling for @ to track nodelist size
463+
$argList[] = $this->evaluateScalar($arg, $context);
464+
$nodelistSizes[] = 1;
465+
} elseif ('@' === $arg) {
466+
$argList[] = $context;
467+
$nodelistSizes[] = 1;
468+
} elseif (!\is_array($context)) {
469+
$argList[] = null;
470+
$nodelistSizes[] = 0;
471+
} elseif (str_starts_with($pathPart = substr($arg, 1), '[')) {
472+
// handle bracket expressions like @['a','d']
473+
$results = $this->evaluateBracket(substr($pathPart, 1, -1), $context);
474+
$argList[] = $results;
475+
$nodelistSizes[] = \count($results);
476+
} else {
477+
// handle dot notation like @.a
478+
$results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$pathPart)), $context);
479+
$argList[] = $results[0] ?? null;
480+
$nodelistSizes[] = \count($results);
481+
}
482+
}
483+
}
425484

426-
$value = $args[0] ?? null;
485+
$value = $argList[0] ?? null;
486+
$nodelistSize = $nodelistSizes[0] ?? 0;
427487

428488
return match ($name) {
429489
'length' => match (true) {
430490
\is_string($value) => mb_strlen($value),
431491
\is_array($value) => \count($value),
432492
default => 0,
433493
},
434-
'count' => \is_array($value) ? \count($value) : 0,
494+
'count' => $nodelistSize,
435495
'match' => match (true) {
436-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $args[1]), $value),
496+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/u', $this->transformJsonPathRegex($argList[1])), $value),
437497
default => false,
438498
},
439499
'search' => match (true) {
440-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match("/$args[1]/", $value),
500+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$this->transformJsonPathRegex($argList[1])}/u", $value),
441501
default => false,
442502
},
443-
'value' => $value,
503+
'value' => 1 < $nodelistSize ? null : (1 === $nodelistSize ? (\is_array($value) ? ($value[0] ?? null) : $value) : $value),
444504
default => null,
445505
};
446506
}
@@ -474,43 +534,52 @@ private function compare(mixed $left, mixed $right, string $operator): bool
474534
};
475535
}
476536

477-
private function parseCommaSeparatedValues(string $expr): array
537+
/*
538+
* Transform JSONPath regex patterns to comply with RFC 9535. The main issue is
539+
* that '.' should not match \r or \n but should match Unicode line
540+
* separators U+2028 and U+2029.
541+
*/
542+
private function transformJsonPathRegex(string $pattern): string
478543
{
479-
$parts = [];
480-
$current = '';
481-
$inQuotes = false;
482-
$quoteChar = null;
544+
$result = '';
545+
$inCharClass = false;
546+
$escaped = false;
547+
$length = \strlen($pattern);
483548

484-
for ($i = 0; $i < \strlen($expr); ++$i) {
485-
$char = $expr[$i];
549+
for ($i = 0; $i < $length; ++$i) {
550+
$char = $pattern[$i];
486551

487-
if ('\\' === $char && $i + 1 < \strlen($expr)) {
488-
$current .= $char.$expr[++$i];
552+
if ($escaped) {
553+
$result .= $char;
554+
$escaped = false;
489555
continue;
490556
}
491557

492-
if ('"' === $char || "'" === $char) {
493-
if (!$inQuotes) {
494-
$inQuotes = true;
495-
$quoteChar = $char;
496-
} elseif ($char === $quoteChar) {
497-
$inQuotes = false;
498-
$quoteChar = null;
499-
}
500-
} elseif (!$inQuotes && ',' === $char) {
501-
$parts[] = trim($current);
502-
$current = '';
558+
if ('\\' === $char) {
559+
$result .= $char;
560+
$escaped = true;
561+
continue;
562+
}
503563

564+
if ('[' === $char && !$inCharClass) {
565+
$inCharClass = true;
566+
$result .= $char;
504567
continue;
505568
}
506569

507-
$current .= $char;
508-
}
570+
if (']' === $char && $inCharClass) {
571+
$inCharClass = false;
572+
$result .= $char;
573+
continue;
574+
}
509575

510-
if ('' !== $current) {
511-
$parts[] = trim($current);
576+
if ('.' === $char && !$inCharClass) {
577+
$result .= '(?:[^\r\n]|\x{2028}|\x{2029})';
578+
} else {
579+
$result .= $char;
580+
}
512581
}
513582

514-
return $parts;
583+
return $result;
515584
}
516585
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy