Skip to content

Commit 2a332b9

Browse files
committed
Save some calls to len() in the input stream (improves tokeniser performance by maybe 1-2%)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401234
1 parent 43a2727 commit 2a332b9

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/html5lib/inputstream.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
7070
'replace')
7171

7272
self.chunk = u""
73+
self.chunkSize = 0
7374
self.chunkOffset = 0
7475
self.ungetBuffer = [] # reversed list of chars from unget()
7576
self.readChars = []
@@ -255,7 +256,7 @@ def char(self):
255256
self.readChars.append(char)
256257
return char
257258

258-
if self.chunkOffset >= len(self.chunk):
259+
if self.chunkOffset >= self.chunkSize:
259260
if not self.readChunk():
260261
return EOF
261262

@@ -267,6 +268,7 @@ def char(self):
267268

268269
def readChunk(self, chunkSize=_defaultChunkSize):
269270
self.chunk = u""
271+
self.chunkSize = 0
270272
self.chunkOffset = 0
271273

272274
data = self.dataStream.read(chunkSize)
@@ -291,6 +293,7 @@ def readChunk(self, chunkSize=_defaultChunkSize):
291293

292294
data = unicode(data)
293295
self.chunk = data
296+
self.chunkSize = len(data)
294297

295298
self.updatePosition()
296299
return True
@@ -329,7 +332,7 @@ def charsUntil(self, characters, opposite = False):
329332
m = chars.match(self.chunk, self.chunkOffset)
330333
# If not everything matched, return everything up to the part that didn't match
331334
end = m.end()
332-
if end != len(self.chunk):
335+
if end != self.chunkSize:
333336
rv.append(self.chunk[self.chunkOffset:end])
334337
self.chunkOffset = end
335338
break

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy