Skip to content

Commit 0924dfe

Browse files
authored
Fix JDK regex support (#888)
Summary of changes: - Fix the test resources introduced by #783 by moving the `regex` fields, such that the test framework does not skip them with a "Not a valid test case" message. - Revert the changes introduced by #815, as those are simply incorrect. - Extend the test coverage introduced by #815 by (a) updating the test regexes to match their intended semantics and (b) include a few negative test cases. - Partially revert the change introduced by #783: the use of `Matcher#find()` is correct, but the `hasStartAnchor` and `hasEndAnchor` logic introduces more bugs than the issue it aims to solve. - Extend the test coverage introduced by #783, by introducing regexes that are not covered by the `hasStartAnchor`/`hasEndAnchor` logic. - Update the Joni regular expression integration such that it passes more of the test cases. - Disable the "trailing newline" test cases, as these are currently not handled correctly by either regex implementation.
1 parent 9ed6dc2 commit 0924dfe

File tree

5 files changed

+99
-40
lines changed

5 files changed

+99
-40
lines changed
Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,16 @@
11
package com.networknt.schema.regex;
22

3-
import java.util.regex.Matcher;
43
import java.util.regex.Pattern;
54

65
class JDKRegularExpression implements RegularExpression {
76
private final Pattern pattern;
8-
private final boolean hasStartAnchor;
9-
private final boolean hasEndAnchor;
107

118
JDKRegularExpression(String regex) {
12-
// The patterns in JSON Schema are not implicitly anchored so we must
13-
// use Matcher.find(). However, this method does not honor the end
14-
// anchor when immediately preceded by a quantifier (e.g., ?, *, +).
15-
// To make this work in all cases, we wrap the pattern in a group.
16-
this.hasStartAnchor = '^' == regex.charAt(0);
17-
this.hasEndAnchor = '$' == regex.charAt(regex.length() - 1);
18-
String pattern = regex;
19-
if (this.hasEndAnchor) {
20-
pattern = pattern.substring(this.hasStartAnchor ? 1 : 0, pattern.length() - 1);
21-
pattern = '(' + pattern + ")$";
22-
if (this.hasStartAnchor) pattern = '^' + pattern;
23-
}
24-
this.pattern = Pattern.compile(pattern);
9+
this.pattern = Pattern.compile(regex);
2510
}
2611

2712
@Override
2813
public boolean matches(String value) {
29-
Matcher matcher = this.pattern.matcher(value);
30-
return matcher.find() && (!this.hasStartAnchor || 0 == matcher.start()) && (!this.hasEndAnchor || matcher.end() == value.length());
14+
return this.pattern.matcher(value).find();
3115
}
32-
3316
}

src/main/java/com/networknt/schema/regex/JoniRegularExpression.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class JoniRegularExpression implements RegularExpression {
2121
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");
2222

2323
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
24-
this.pattern = new Regex(bytes, 0, bytes.length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
24+
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
2525
}
2626

2727
@Override

src/test/java/com/networknt/schema/regex/Issue814Test.java

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,62 @@ class Issue814Test {
88

99
@Test
1010
void jdkTypePattern() {
11-
JDKRegularExpression ex = new JDKRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
11+
JDKRegularExpression ex = new JDKRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
1212
assertTrue(ex.matches("list"));
1313
assertTrue(ex.matches("string"));
1414
assertTrue(ex.matches("boolean"));
1515
assertTrue(ex.matches("number"));
1616
assertTrue(ex.matches("enum"));
17+
assertFalse(ex.matches("listZ"));
18+
assertFalse(ex.matches("AenumZ"));
19+
assertFalse(ex.matches("Anumber"));
1720
}
1821

1922
@Test
2023
void jdkOptionsPattern() {
21-
JDKRegularExpression ex = new JDKRegularExpression("^\\d*|[a-zA-Z_]+$");
22-
assertTrue(ex.matches("external"));
23-
assertTrue(ex.matches("external_gte"));
24-
assertTrue(ex.matches("force"));
25-
assertTrue(ex.matches("internal"));
24+
JDKRegularExpression ex = new JDKRegularExpression("^\\d|[a-zA-Z_]$");
25+
assertTrue(ex.matches("5"));
26+
assertTrue(ex.matches("55"));
27+
assertTrue(ex.matches("5%"));
28+
assertTrue(ex.matches("a"));
29+
assertTrue(ex.matches("aa"));
30+
assertTrue(ex.matches("%a"));
31+
assertTrue(ex.matches("%_"));
32+
assertTrue(ex.matches("55aa"));
33+
assertTrue(ex.matches("5%%a"));
34+
assertFalse(ex.matches(""));
35+
assertFalse(ex.matches("%"));
36+
assertFalse(ex.matches("a5"));
2637
}
2738

2839
@Test
2940
void joniTypePattern() {
30-
JoniRegularExpression ex = new JoniRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
41+
JoniRegularExpression ex = new JoniRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
3142
assertTrue(ex.matches("list"));
3243
assertTrue(ex.matches("string"));
3344
assertTrue(ex.matches("boolean"));
3445
assertTrue(ex.matches("number"));
3546
assertTrue(ex.matches("enum"));
47+
assertFalse(ex.matches("listZ"));
48+
assertFalse(ex.matches("AenumZ"));
49+
assertFalse(ex.matches("Anumber"));
3650
}
3751

3852
@Test
3953
void joniOptionsPattern() {
40-
JoniRegularExpression ex = new JoniRegularExpression("^\\d*|[a-zA-Z_]+$");
41-
assertTrue(ex.matches("internal"));
42-
assertTrue(ex.matches("external"));
43-
assertTrue(ex.matches("external_gte"));
44-
assertTrue(ex.matches("force"));
54+
JoniRegularExpression ex = new JoniRegularExpression("^\\d|[a-zA-Z_]$");
55+
assertTrue(ex.matches("5"));
56+
assertTrue(ex.matches("55"));
57+
assertTrue(ex.matches("5%"));
58+
assertTrue(ex.matches("a"));
59+
assertTrue(ex.matches("aa"));
60+
assertTrue(ex.matches("%a"));
61+
assertTrue(ex.matches("%_"));
62+
assertTrue(ex.matches("55aa"));
63+
assertTrue(ex.matches("5%%a"));
64+
assertFalse(ex.matches(""));
65+
assertFalse(ex.matches("%"));
66+
assertFalse(ex.matches("a5"));
4567
}
4668

4769
}

src/test/resources/draft2020-12/issue495.json

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,105 @@
11
[
22
{
33
"description": "issue495 using ECMA-262",
4-
"regex": "ecma-262",
54
"schema": {
65
"$schema": "https://json-schema.org/draft/2020-12/schema",
7-
"pattern": "^[a-z]{1,10}$",
6+
"patternProperties": {
7+
"^[a-z]{1,10}$": true,
8+
"(^1$)": true
9+
},
810
"unevaluatedProperties": false
911
},
1012
"tests": [
1113
{
1214
"description": "an expected property name",
15+
"regex": "ecma-262",
1316
"data": { "aaa": 3 },
1417
"valid": true
1518
},
19+
{
20+
"description": "another expected property name",
21+
"regex": "jdk",
22+
"data": { "1": 3 },
23+
"valid": true
24+
},
1625
{
1726
"description": "trailing newline",
27+
"regex": "ecma-262",
1828
"data": { "aaa\n": 3 },
19-
"valid": false
29+
"valid": false,
30+
"disabled": true,
31+
"comment": "Test fails"
32+
},
33+
{
34+
"description": "another trailing newline",
35+
"regex": "jdk",
36+
"data": { "1\n": 3 },
37+
"valid": false,
38+
"disabled": true,
39+
"comment": "Test fails"
2040
},
2141
{
2242
"description": "embedded newline",
43+
"regex": "ecma-262",
2344
"data": { "aaa\nbbb": 3 },
2445
"valid": false
2546
},
2647
{
2748
"description": "leading newline",
49+
"regex": "ecma-262",
2850
"data": { "\nbbb": 3 },
2951
"valid": false
3052
}
3153
]
3254
},
3355
{
3456
"description": "issue495 using Java Pattern",
35-
"regex": "jdk",
3657
"schema": {
3758
"$schema": "https://json-schema.org/draft/2020-12/schema",
38-
"pattern": "^[a-z]{1,10}$",
59+
"patternProperties": {
60+
"^[a-z]{1,10}$": true,
61+
"(^1$)": true
62+
},
3963
"unevaluatedProperties": false
4064
},
4165
"tests": [
4266
{
4367
"description": "an expected property name",
68+
"regex": "jdk",
4469
"data": { "aaa": 3 },
4570
"valid": true
4671
},
72+
{
73+
"description": "another expected property name",
74+
"regex": "jdk",
75+
"data": { "1": 3 },
76+
"valid": true
77+
},
4778
{
4879
"description": "trailing newline",
80+
"regex": "jdk",
4981
"data": { "aaa\n": 3 },
50-
"valid": false
82+
"valid": false,
83+
"disabled": true,
84+
"comment": "Test fails"
85+
},
86+
{
87+
"description": "another trailing newline",
88+
"regex": "jdk",
89+
"data": { "1\n": 3 },
90+
"valid": false,
91+
"disabled": true,
92+
"comment": "Test fails"
5193
},
5294
{
5395
"description": "embedded newline",
96+
"regex": "jdk",
5497
"data": { "aaa\nbbb": 3 },
5598
"valid": false
5699
},
57100
{
58101
"description": "leading newline",
102+
"regex": "jdk",
59103
"data": { "\nbbb": 3 },
60104
"valid": false
61105
}

src/test/resources/draft2020-12/issue782.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
[
22
{
33
"description": "issue782 using ECMA-262",
4-
"regex": "ecma-262",
54
"schema": {
65
"$schema": "https://json-schema.org/draft/2020-12/schema",
76
"patternProperties": {
@@ -14,39 +13,44 @@
1413
"tests": [
1514
{
1615
"description": "regexes may be anchored to the start of the property name, 1",
16+
"regex": "ecma-262",
1717
"data": { "x-api-id": 3 },
1818
"valid": true
1919
},
2020
{
2121
"description": "regexes may be anchored to the start of the property name, 2",
22+
"regex": "ecma-262",
2223
"data": { "ax-api-id": 3 },
2324
"valid": false
2425
},
2526
{
2627
"description": "regexes may be anchored to the end of the property name, 1",
28+
"regex": "ecma-262",
2729
"data": { "api-id-y-": 3 },
2830
"valid": true
2931
},
3032
{
3133
"description": "regexes may be anchored to the end of the property name, 2",
34+
"regex": "ecma-262",
3235
"data": { "y-api-id": 3 },
3336
"valid": false
3437
},
3538
{
3639
"description": "regexes may be anchored to both ends of the property name, 1",
40+
"regex": "ecma-262",
3741
"data": { "z-": 3 },
3842
"valid": true
3943
},
4044
{
4145
"description": "regexes may be anchored to both ends of the property name, 2",
46+
"regex": "ecma-262",
4247
"data": { "az-api-id": 3 },
4348
"valid": false
4449
}
4550
]
4651
},
4752
{
4853
"description": "issue782 using Java Pattern",
49-
"regex": "jdk",
5054
"schema": {
5155
"$schema": "https://json-schema.org/draft/2020-12/schema",
5256
"patternProperties": {
@@ -59,31 +63,37 @@
5963
"tests": [
6064
{
6165
"description": "regexes may be anchored to the start of the property name, 1",
66+
"regex": "jdk",
6267
"data": { "x-api-id": 3 },
6368
"valid": true
6469
},
6570
{
6671
"description": "regexes may be anchored to the start of the property name, 2",
72+
"regex": "jdk",
6773
"data": { "ax-api-id": 3 },
6874
"valid": false
6975
},
7076
{
7177
"description": "regexes may be anchored to the end of the property name, 1",
78+
"regex": "jdk",
7279
"data": { "api-id-y-": 3 },
7380
"valid": true
7481
},
7582
{
7683
"description": "regexes may be anchored to the end of the property name, 2",
84+
"regex": "jdk",
7785
"data": { "y-api-id": 3 },
7886
"valid": false
7987
},
8088
{
8189
"description": "regexes may be anchored to both ends of the property name, 1",
90+
"regex": "jdk",
8291
"data": { "z-": 3 },
8392
"valid": true
8493
},
8594
{
8695
"description": "regexes may be anchored to both ends of the property name, 2",
96+
"regex": "jdk",
8797
"data": { "az-api-id": 3 },
8898
"valid": false
8999
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy