Skip to content

Commit 377c177

Browse files
committed
chore: refactor parseAttributes
1 parent 50dd335 commit 377c177

File tree

1 file changed

+110
-145
lines changed

1 file changed

+110
-145
lines changed

src/parser/html.ts

Lines changed: 110 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,52 @@ export type AttributeValueToken = {
1414
end: number;
1515
};
1616

17-
const spacePattern = /\s/;
17+
const RE_IS_SPACE = /^\s$/u;
18+
19+
class State {
20+
public readonly code: string;
21+
22+
public index: number;
23+
24+
public curr: string | null = null;
25+
26+
public constructor(code: string, index: number) {
27+
this.code = code;
28+
this.index = index;
29+
this.curr = code[index] || null;
30+
}
31+
32+
public skipSpaces() {
33+
while (this.currIsSpace()) {
34+
this.index++;
35+
if (this.eof()) break;
36+
}
37+
}
38+
39+
public currIsSpace() {
40+
return RE_IS_SPACE.test(this.curr || "");
41+
}
42+
43+
public currIs(expect: string): any {
44+
return this.code.startsWith(expect, this.index);
45+
}
46+
47+
public eof(): boolean {
48+
return this.index >= this.code.length;
49+
}
50+
51+
public eat<E extends string>(expect: E) {
52+
if (!this.currIs(expect)) {
53+
return null;
54+
}
55+
this.index += expect.length;
56+
return expect;
57+
}
58+
59+
public advance() {
60+
return (this.curr = this.code[++this.index] || null);
61+
}
62+
}
1863

1964
/** Parse HTML attributes */
2065
export function parseAttributes(
@@ -23,182 +68,102 @@ export function parseAttributes(
2368
): { attributes: AttributeToken[]; index: number } {
2469
const attributes: AttributeToken[] = [];
2570

26-
let index = startIndex;
27-
while (index < code.length) {
28-
const char = code[index];
29-
if (spacePattern.test(char)) {
30-
index++;
31-
continue;
32-
}
33-
if (char === ">" || (char === "/" && code[index + 1] === ">")) break;
34-
const attrData = parseAttribute(code, index);
35-
attributes.push(attrData.attribute);
36-
index = attrData.index;
71+
const state = new State(code, startIndex);
72+
73+
while (!state.eof()) {
74+
state.skipSpaces();
75+
if (state.currIs(">") || state.currIs("/>") || state.eof()) break;
76+
attributes.push(parseAttribute(state));
3777
}
3878

39-
return { attributes, index };
79+
return { attributes, index: state.index };
4080
}
4181

4282
/** Parse HTML attribute */
43-
function parseAttribute(
44-
code: string,
45-
startIndex: number,
46-
): { attribute: AttributeToken; index: number } {
83+
function parseAttribute(state: State): AttributeToken {
4784
// parse key
48-
const keyData = parseAttributeKey(code, startIndex);
49-
const key = keyData.key;
50-
let index = keyData.index;
51-
if (code[index] !== "=") {
85+
const key = parseAttributeKey(state);
86+
state.skipSpaces();
87+
if (!state.eat("=")) {
5288
return {
53-
attribute: {
54-
key,
55-
value: null,
56-
},
57-
index,
89+
key,
90+
value: null,
5891
};
5992
}
60-
61-
index++;
62-
63-
// skip spaces
64-
while (index < code.length) {
65-
const char = code[index];
66-
if (spacePattern.test(char)) {
67-
index++;
68-
continue;
69-
}
70-
break;
93+
state.skipSpaces();
94+
if (state.eof()) {
95+
return {
96+
key,
97+
value: null,
98+
};
7199
}
72-
73100
// parse value
74-
const valueData = parseAttributeValue(code, index);
75-
101+
const value = parseAttributeValue(state);
76102
return {
77-
attribute: {
78-
key,
79-
value: valueData.value,
80-
},
81-
index: valueData.index,
103+
key,
104+
value,
82105
};
83106
}
84107

85108
/** Parse HTML attribute key */
86-
function parseAttributeKey(
87-
code: string,
88-
startIndex: number,
89-
): { key: AttributeKeyToken; index: number } {
90-
const key: AttributeKeyToken = {
91-
name: code[startIndex],
92-
start: startIndex,
93-
end: startIndex + 1,
94-
};
95-
let index = key.end;
96-
while (index < code.length) {
97-
const char = code[index];
109+
function parseAttributeKey(state: State): AttributeKeyToken {
110+
const start = state.index;
111+
while (state.advance()) {
98112
if (
99-
char === "=" ||
100-
char === ">" ||
101-
(char === "/" && code[index + 1] === ">")
113+
state.currIs("=") ||
114+
state.currIs(">") ||
115+
state.currIs("/>") ||
116+
state.currIsSpace()
102117
) {
103118
break;
104119
}
105-
if (spacePattern.test(char)) {
106-
for (let i = index; i < code.length; i++) {
107-
const c = code[i];
108-
if (c === "=") {
109-
return {
110-
key,
111-
index: i,
112-
};
113-
}
114-
if (spacePattern.test(c)) {
115-
continue;
116-
}
117-
return {
118-
key,
119-
index,
120-
};
121-
}
122-
break;
123-
}
124-
key.name += char;
125-
index++;
126-
key.end = index;
127120
}
121+
const end = state.index;
128122
return {
129-
key,
130-
index,
123+
name: state.code.slice(start, end),
124+
start,
125+
end,
131126
};
132127
}
133128

134129
/** Parse HTML attribute value */
135-
function parseAttributeValue(
136-
code: string,
137-
startIndex: number,
138-
): { value: AttributeValueToken | null; index: number } {
139-
let index = startIndex;
140-
const maybeQuote = code[index];
141-
if (maybeQuote == null) {
142-
return {
143-
value: null,
144-
index,
145-
};
146-
}
147-
const quote = maybeQuote === '"' || maybeQuote === "'" ? maybeQuote : null;
130+
function parseAttributeValue(state: State): AttributeValueToken {
131+
const start = state.index;
132+
const quote = state.eat('"') || state.eat("'");
148133
if (quote) {
149-
index++;
150-
}
151-
const valueFirstChar = code[index];
152-
if (valueFirstChar == null) {
153-
return {
154-
value: {
155-
value: maybeQuote,
134+
if (state.eof()) {
135+
return {
136+
value: quote,
156137
quote: null,
157-
start: startIndex,
158-
end: index,
159-
},
160-
index,
161-
};
162-
}
163-
if (valueFirstChar === quote) {
164-
return {
165-
value: {
166-
value: "",
167-
quote,
168-
start: startIndex,
169-
end: index + 1,
170-
},
171-
index: index + 1,
172-
};
173-
}
174-
const value: AttributeValueToken = {
175-
value: valueFirstChar,
176-
quote,
177-
start: startIndex,
178-
end: index + 1,
179-
};
180-
index = value.end;
181-
while (index < code.length) {
182-
const char = code[index];
183-
if (quote) {
184-
if (quote === char) {
185-
index++;
186-
value.end = index;
138+
start,
139+
end: state.index,
140+
};
141+
}
142+
let c: string | null;
143+
while ((c = state.curr)) {
144+
state.advance();
145+
if (c === quote) {
146+
const end = state.index;
147+
return {
148+
value: state.code.slice(start + 1, end - 1),
149+
quote,
150+
start,
151+
end,
152+
};
153+
}
154+
}
155+
} else {
156+
while (state.advance()) {
157+
if (state.currIsSpace() || state.currIs(">") || state.currIs("/>")) {
187158
break;
188159
}
189-
} else if (
190-
spacePattern.test(char) ||
191-
char === ">" ||
192-
(char === "/" && code[index + 1] === ">")
193-
) {
194-
break;
195160
}
196-
value.value += char;
197-
index++;
198-
value.end = index;
199161
}
162+
const end = state.index;
200163
return {
201-
value,
202-
index,
164+
value: state.code.slice(start, end),
165+
quote: null,
166+
start,
167+
end,
203168
};
204169
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy