diff --git a/java/ql/lib/change-notes/2024-01-06-regex-flag-parsing.md b/java/ql/lib/change-notes/2024-01-06-regex-flag-parsing.md new file mode 100644 index 000000000000..532ab1a88dc0 --- /dev/null +++ b/java/ql/lib/change-notes/2024-01-06-regex-flag-parsing.md @@ -0,0 +1,4 @@ +--- +category: fix +--- +* Fixed regular expressions containing flags not being parsed correctly in some cases. diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll index a131ac0deb5b..f0336c2d0235 100644 --- a/java/ql/lib/semmle/code/java/regex/regex.qll +++ b/java/ql/lib/semmle/code/java/regex/regex.qll @@ -479,7 +479,7 @@ abstract class RegexString extends StringLiteral { private predicate flagGroupStartNoModes(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and - this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and + this.getChar(start + 2) in ["-", "i", "d", "m", "s", "u", "x", "U"] and end = start + 2 } @@ -491,7 +491,7 @@ abstract class RegexString extends StringLiteral { this.flagGroupStartNoModes(start, pos) or this.modeCharacter(start, pos - 1) and - this.getChar(pos) in ["i", "m", "s", "u", "x", "U"] + this.getChar(pos) in ["-", "i", "d", "m", "s", "u", "x", "U"] } /** @@ -499,7 +499,10 @@ abstract class RegexString extends StringLiteral { */ private predicate flagGroupStart(int start, int end) { this.flagGroupStartNoModes(start, _) and - end = max(int i | this.modeCharacter(start, i) | i + 1) + // Check if this is a capturing group with flags, and therefore the `:` should be excluded + exists(int maybeEnd | maybeEnd = max(int i | this.modeCharacter(start, i) | i + 1) | + if this.getChar(maybeEnd) = ":" then end = maybeEnd + 1 else end = maybeEnd + ) } /** @@ -510,9 +513,15 @@ abstract class RegexString extends StringLiteral { * ``` */ private predicate flag(string c) { - exists(int pos | - this.modeCharacter(_, pos) and - this.getChar(pos) = c + exists(int start, int pos | + this.modeCharacter(start, pos) and + this.getChar(pos) = c and + // Ignore if flag is disabled; use `<=` to also exclude `-` itself + // This does not properly handle the (contrived) case where a flag is both enabled and + // disabled, e.g. `(?i-i)a+`, in which case the flag seems to acts as if it was disabled + not exists(int minusPos | + this.modeCharacter(start, minusPos) and this.getChar(minusPos) = "-" and minusPos <= pos + ) ) } @@ -524,6 +533,8 @@ abstract class RegexString extends StringLiteral { exists(string c | this.flag(c) | c = "i" and result = "IGNORECASE" or + c = "d" and result = "UNIXLINES" + or c = "m" and result = "MULTILINE" or c = "s" and result = "DOTALL" @@ -930,13 +941,13 @@ class Regex extends RegexString { /** * Gets a mode (if any) of this regular expression. Can be any of: - * DEBUG - * IGNORECASE - * MULTILINE - * DOTALL - * UNICODE - * VERBOSE - * UNICODECLASS + * - IGNORECASE + * - UNIXLINES + * - MULTILINE + * - DOTALL + * - UNICODE + * - VERBOSE + * - UNICODECLASS */ string getAMode() { result != "None" and @@ -946,7 +957,7 @@ class Regex extends RegexString { } /** - * Holds if this regex is used to match against a full string, + * Holds if this regex is used to match against a full string, * as though it was implicitly surrounded by ^ and $. */ predicate matchesFullString() { matches_full_string = true } diff --git a/java/ql/test/library-tests/regex/parser/RegexParseTests.expected b/java/ql/test/library-tests/regex/parser/RegexParseTests.expected index ad94d005289c..03dc22616100 100644 --- a/java/ql/test/library-tests/regex/parser/RegexParseTests.expected +++ b/java/ql/test/library-tests/regex/parser/RegexParseTests.expected @@ -1,4 +1,8 @@ parseFailures +modes +| Test.java:17:9:17:37 | "(?i)(?=a)(?!b)(?<=c)(?
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: