6
6
"use strict" ;
7
7
8
8
const astUtils = require ( "./utils/ast-utils" ) ;
9
+ const { RegExpParser, visitRegExpAST } = require ( "@eslint-community/regexpp" ) ;
9
10
11
+ /**
12
+ * @typedef {import('@eslint-community/regexpp').AST.CharacterClass } CharacterClass
13
+ * @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass } ExpressionCharacterClass
14
+ */
10
15
//------------------------------------------------------------------------------
11
16
// Rule Definition
12
17
//------------------------------------------------------------------------------
@@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
28
33
const REGEX_GENERAL_ESCAPES = new Set ( "\\bcdDfnpPrsStvwWxu0123456789]" ) ;
29
34
const REGEX_NON_CHARCLASS_ESCAPES = union ( REGEX_GENERAL_ESCAPES , new Set ( "^/.$*+?[{}|()Bk" ) ) ;
30
35
31
- /**
32
- * Parses a regular expression into a list of characters with character class info.
33
- * @param {string } regExpText The raw text used to create the regular expression
34
- * @returns {Object[] } A list of characters, each with info on escaping and whether they're in a character class.
35
- * @example
36
- *
37
- * parseRegExp("a\\b[cd-]");
38
- *
39
- * // returns:
40
- * [
41
- * { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
42
- * { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
43
- * { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
44
- * { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
45
- * { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
46
- * ];
47
- *
36
+ /*
37
+ * Set of characters that require escaping in character classes in `unicodeSets` mode.
38
+ * ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
48
39
*/
49
- function parseRegExp ( regExpText ) {
50
- const charList = [ ] ;
40
+ const REGEX_CLASSSET_CHARACTER_ESCAPES = union ( REGEX_GENERAL_ESCAPES , new Set ( "q/[{}|()-" ) ) ;
51
41
52
- regExpText . split ( "" ) . reduce ( ( state , char , index ) => {
53
- if ( ! state . escapeNextChar ) {
54
- if ( char === "\\" ) {
55
- return Object . assign ( state , { escapeNextChar : true } ) ;
56
- }
57
- if ( char === "[" && ! state . inCharClass ) {
58
- return Object . assign ( state , { inCharClass : true , startingCharClass : true } ) ;
59
- }
60
- if ( char === "]" && state . inCharClass ) {
61
- if ( charList . length && charList [ charList . length - 1 ] . inCharClass ) {
62
- charList [ charList . length - 1 ] . endsCharClass = true ;
63
- }
64
- return Object . assign ( state , { inCharClass : false , startingCharClass : false } ) ;
65
- }
66
- }
67
- charList . push ( {
68
- text : char ,
69
- index,
70
- escaped : state . escapeNextChar ,
71
- inCharClass : state . inCharClass ,
72
- startsCharClass : state . startingCharClass ,
73
- endsCharClass : false
74
- } ) ;
75
- return Object . assign ( state , { escapeNextChar : false , startingCharClass : false } ) ;
76
- } , { escapeNextChar : false , inCharClass : false , startingCharClass : false } ) ;
77
-
78
- return charList ;
79
- }
42
+ /*
43
+ * A single character set of ClassSetReservedDoublePunctuator.
44
+ * && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
45
+ */
46
+ const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set ( "!#$%&*+,.:;<=>?@^`~" ) ;
80
47
81
48
/** @type {import('../shared/types').Rule } */
82
49
module . exports = {
@@ -103,15 +70,17 @@ module.exports = {
103
70
104
71
create ( context ) {
105
72
const sourceCode = context . sourceCode ;
73
+ const parser = new RegExpParser ( ) ;
106
74
107
75
/**
108
76
* Reports a node
109
77
* @param {ASTNode } node The node to report
110
78
* @param {number } startOffset The backslash's offset from the start of the node
111
79
* @param {string } character The uselessly escaped character (not including the backslash)
80
+ * @param {boolean } [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
112
81
* @returns {void }
113
82
*/
114
- function report ( node , startOffset , character ) {
83
+ function report ( node , startOffset , character , disableEscapeBackslashSuggest ) {
115
84
const rangeStart = node . range [ 0 ] + startOffset ;
116
85
const range = [ rangeStart , rangeStart + 1 ] ;
117
86
const start = sourceCode . getLocFromIndex ( rangeStart ) ;
@@ -134,12 +103,16 @@ module.exports = {
134
103
return fixer . removeRange ( range ) ;
135
104
}
136
105
} ,
137
- {
138
- messageId : "escapeBackslash" ,
139
- fix ( fixer ) {
140
- return fixer . insertTextBeforeRange ( range , "\\" ) ;
141
- }
142
- }
106
+ ...disableEscapeBackslashSuggest
107
+ ? [ ]
108
+ : [
109
+ {
110
+ messageId : "escapeBackslash" ,
111
+ fix ( fixer ) {
112
+ return fixer . insertTextBeforeRange ( range , "\\" ) ;
113
+ }
114
+ }
115
+ ]
143
116
]
144
117
} ) ;
145
118
}
@@ -182,6 +155,133 @@ module.exports = {
182
155
}
183
156
}
184
157
158
+ /**
159
+ * Checks if the escape character in given regexp is unnecessary.
160
+ * @private
161
+ * @param {ASTNode } node node to validate.
162
+ * @returns {void }
163
+ */
164
+ function validateRegExp ( node ) {
165
+ const { pattern, flags } = node . regex ;
166
+ let patternNode ;
167
+ const unicode = flags . includes ( "u" ) ;
168
+ const unicodeSets = flags . includes ( "v" ) ;
169
+
170
+ try {
171
+ patternNode = parser . parsePattern ( pattern , 0 , pattern . length , { unicode, unicodeSets } ) ;
172
+ } catch {
173
+
174
+ // Ignore regular expressions with syntax errors
175
+ return ;
176
+ }
177
+
178
+ /** @type {(CharacterClass | ExpressionCharacterClass)[] } */
179
+ const characterClassStack = [ ] ;
180
+
181
+ visitRegExpAST ( patternNode , {
182
+ onCharacterClassEnter : characterClassNode => characterClassStack . unshift ( characterClassNode ) ,
183
+ onCharacterClassLeave : ( ) => characterClassStack . shift ( ) ,
184
+ onExpressionCharacterClassEnter : characterClassNode => characterClassStack . unshift ( characterClassNode ) ,
185
+ onExpressionCharacterClassLeave : ( ) => characterClassStack . shift ( ) ,
186
+ onCharacterEnter ( characterNode ) {
187
+ if ( ! characterNode . raw . startsWith ( "\\" ) ) {
188
+
189
+ // It's not an escaped character.
190
+ return ;
191
+ }
192
+
193
+ const escapedChar = characterNode . raw . slice ( 1 ) ;
194
+
195
+ if ( escapedChar !== String . fromCodePoint ( characterNode . value ) ) {
196
+
197
+ // It's a valid escape.
198
+ return ;
199
+ }
200
+ let allowedEscapes ;
201
+
202
+ if ( characterClassStack . length ) {
203
+ allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES ;
204
+ } else {
205
+ allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES ;
206
+ }
207
+ if ( allowedEscapes . has ( escapedChar ) ) {
208
+ return ;
209
+ }
210
+
211
+ const reportedIndex = characterNode . start + 1 ;
212
+ let disableEscapeBackslashSuggest = false ;
213
+
214
+ if ( characterClassStack . length ) {
215
+ const characterClassNode = characterClassStack [ 0 ] ;
216
+
217
+ if ( escapedChar === "^" ) {
218
+
219
+ /*
220
+ * The '^' character is also a special case; it must always be escaped outside of character classes, but
221
+ * it only needs to be escaped in character classes if it's at the beginning of the character class. To
222
+ * account for this, consider it to be a valid escape character outside of character classes, and filter
223
+ * out '^' characters that appear at the start of a character class.
224
+ */
225
+ if ( characterClassNode . start + 1 === characterNode . start ) {
226
+
227
+ return ;
228
+ }
229
+ }
230
+ if ( ! unicodeSets ) {
231
+ if ( escapedChar === "-" ) {
232
+
233
+ /*
234
+ * The '-' character is a special case, because it's only valid to escape it if it's in a character
235
+ * class, and is not at either edge of the character class. To account for this, don't consider '-'
236
+ * characters to be valid in general, and filter out '-' characters that appear in the middle of a
237
+ * character class.
238
+ */
239
+ if ( characterClassNode . start + 1 !== characterNode . start && characterNode . end !== characterClassNode . end - 1 ) {
240
+
241
+ return ;
242
+ }
243
+ }
244
+ } else { // unicodeSets mode
245
+ if ( REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR . has ( escapedChar ) ) {
246
+
247
+ // Escaping is valid if it is a ClassSetReservedDoublePunctuator.
248
+ if ( pattern [ characterNode . end ] === escapedChar ) {
249
+ return ;
250
+ }
251
+ if ( pattern [ characterNode . start - 1 ] === escapedChar ) {
252
+ if ( escapedChar !== "^" ) {
253
+ return ;
254
+ }
255
+
256
+ // If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.
257
+
258
+ if ( ! characterClassNode . negate ) {
259
+ return ;
260
+ }
261
+ const negateCaretIndex = characterClassNode . start + 1 ;
262
+
263
+ if ( negateCaretIndex < characterNode . start - 1 ) {
264
+ return ;
265
+ }
266
+ }
267
+ }
268
+
269
+ if ( characterNode . parent . type === "ClassIntersection" || characterNode . parent . type === "ClassSubtraction" ) {
270
+ disableEscapeBackslashSuggest = true ;
271
+ }
272
+ }
273
+ }
274
+
275
+ report (
276
+ node ,
277
+ reportedIndex ,
278
+ escapedChar ,
279
+ disableEscapeBackslashSuggest
280
+ ) ;
281
+ }
282
+ } ) ;
283
+ }
284
+
185
285
/**
186
286
* Checks if a node has an escape.
187
287
* @param {ASTNode } node node to check.
@@ -220,32 +320,7 @@ module.exports = {
220
320
validateString ( node , match ) ;
221
321
}
222
322
} else if ( node . regex ) {
223
- parseRegExp ( node . regex . pattern )
224
-
225
- /*
226
- * The '-' character is a special case, because it's only valid to escape it if it's in a character
227
- * class, and is not at either edge of the character class. To account for this, don't consider '-'
228
- * characters to be valid in general, and filter out '-' characters that appear in the middle of a
229
- * character class.
230
- */
231
- . filter ( charInfo => ! ( charInfo . text === "-" && charInfo . inCharClass && ! charInfo . startsCharClass && ! charInfo . endsCharClass ) )
232
-
233
- /*
234
- * The '^' character is also a special case; it must always be escaped outside of character classes, but
235
- * it only needs to be escaped in character classes if it's at the beginning of the character class. To
236
- * account for this, consider it to be a valid escape character outside of character classes, and filter
237
- * out '^' characters that appear at the start of a character class.
238
- */
239
- . filter ( charInfo => ! ( charInfo . text === "^" && charInfo . startsCharClass ) )
240
-
241
- // Filter out characters that aren't escaped.
242
- . filter ( charInfo => charInfo . escaped )
243
-
244
- // Filter out characters that are valid to escape, based on their position in the regular expression.
245
- . filter ( charInfo => ! ( charInfo . inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES ) . has ( charInfo . text ) )
246
-
247
- // Report all the remaining characters.
248
- . forEach ( charInfo => report ( node , charInfo . index , charInfo . text ) ) ;
323
+ validateRegExp ( node ) ;
249
324
}
250
325
251
326
}
0 commit comments