Skip to content

Commit dda96f8

Browse files
committed
Get rid of obsolete replacement of unpaired surrogates with U+FFFD.
1 parent 46dae3d commit dda96f8

File tree

2 files changed

+1
-8
lines changed

2 files changed

+1
-8
lines changed

.pytest.expect

-228 Bytes
Binary file not shown.

html5lib/inputstream.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,10 @@ def __init__(self, source):
183183
# Such platforms will have already checked for such
184184
# surrogate errors, so no need to do this checking.
185185
self.reportCharacterErrors = None
186-
self.replaceCharactersRegexp = None
187186
elif len("\U0010FFFF") == 1:
188187
self.reportCharacterErrors = self.characterErrorsUCS4
189-
self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
190188
else:
191189
self.reportCharacterErrors = self.characterErrorsUCS2
192-
self.replaceCharactersRegexp = re.compile(
193-
eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
194190

195191
# List of where new lines occur
196192
self.newLines = [0]
@@ -288,10 +284,7 @@ def readChunk(self, chunkSize=None):
288284
if self.reportCharacterErrors:
289285
self.reportCharacterErrors(data)
290286

291-
# Replace invalid characters
292-
# Note U+0000 is dealt with in the tokenizer
293-
data = self.replaceCharactersRegexp.sub("\ufffd", data)
294-
287+
# Replace invalid characters
295288
data = data.replace("\r\n", "\n")
296289
data = data.replace("\r", "\n")
297290

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy