Skip to content

Commit b2e4802

Browse files
committed
Speedup setting attributes on etree implementations
1 parent b075e51 commit b2e4802

File tree

2 files changed

+55
-30
lines changed

2 files changed

+55
-30
lines changed

html5lib/treebuilders/etree.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import re
77

8+
from copy import copy
9+
810
from . import base
911
from .. import _ihatexml
1012
from .. import constants
@@ -61,16 +63,17 @@ def _getAttributes(self):
6163
return self._element.attrib
6264

6365
def _setAttributes(self, attributes):
64-
# Delete existing attributes first
65-
# XXX - there may be a better way to do this...
66-
for key in list(self._element.attrib.keys()):
67-
del self._element.attrib[key]
68-
for key, value in attributes.items():
69-
if isinstance(key, tuple):
70-
name = "{%s}%s" % (key[2], key[1])
71-
else:
72-
name = key
73-
self._element.set(name, value)
66+
el_attrib = self._element.attrib
67+
el_attrib.clear()
68+
if attributes:
69+
# calling .items _always_ allocates, and the above truthy check is cheaper than the
70+
# allocation on average
71+
for key, value in attributes.items():
72+
if isinstance(key, tuple):
73+
name = "{%s}%s" % (key[2], key[1])
74+
else:
75+
name = key
76+
el_attrib[name] = value
7477

7578
attributes = property(_getAttributes, _setAttributes)
7679

@@ -129,8 +132,8 @@ def insertText(self, data, insertBefore=None):
129132

130133
def cloneNode(self):
131134
element = type(self)(self.name, self.namespace)
132-
for name, value in self.attributes.items():
133-
element.attributes[name] = value
135+
if self._element.attrib:
136+
element._element.attrib = copy(self._element.attrib)
134137
return element
135138

136139
def reparentChildren(self, newParent):

html5lib/treebuilders/etree_lxml.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,19 @@
1616
import re
1717
import sys
1818

19+
try:
20+
from collections.abc import MutableMapping
21+
except ImportError:
22+
from collections import MutableMapping
23+
1924
from . import base
2025
from ..constants import DataLossWarning
2126
from .. import constants
2227
from . import etree as etree_builders
2328
from .. import _ihatexml
2429

2530
import lxml.etree as etree
31+
from six import PY3, binary_type
2632

2733

2834
fullTree = True
@@ -189,26 +195,37 @@ def __init__(self, namespaceHTMLElements, fullTree=False):
189195
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
190196
self.namespaceHTMLElements = namespaceHTMLElements
191197

192-
class Attributes(dict):
193-
def __init__(self, element, value=None):
194-
if value is None:
195-
value = {}
198+
class Attributes(MutableMapping):
199+
def __init__(self, element):
196200
self._element = element
197-
dict.__init__(self, value) # pylint:disable=non-parent-init-called
198-
for key, value in self.items():
199-
if isinstance(key, tuple):
200-
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
201-
else:
202-
name = infosetFilter.coerceAttribute(key)
203-
self._element._element.attrib[name] = value
204201

205-
def __setitem__(self, key, value):
206-
dict.__setitem__(self, key, value)
202+
def _coerceKey(self, key):
207203
if isinstance(key, tuple):
208204
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
209205
else:
210206
name = infosetFilter.coerceAttribute(key)
211-
self._element._element.attrib[name] = value
207+
return name
208+
209+
def __getitem__(self, key):
210+
value = self._element._element.attrib[self._coerceKey(key)]
211+
if not PY3 and isinstance(value, binary_type):
212+
value = value.decode("ascii")
213+
return value
214+
215+
def __setitem__(self, key, value):
216+
self._element._element.attrib[self._coerceKey(key)] = value
217+
218+
def __delitem__(self, key):
219+
del self._element._element.attrib[self._coerceKey(key)]
220+
221+
def __iter__(self):
222+
return iter(self._element._element.attrib)
223+
224+
def __len__(self):
225+
return len(self._element._element.attrib)
226+
227+
def clear(self):
228+
return self._element._element.attrib.clear()
212229

213230
class Element(builder.Element):
214231
def __init__(self, name, namespace):
@@ -229,17 +246,22 @@ def _getName(self):
229246
def _getAttributes(self):
230247
return self._attributes
231248

232-
def _setAttributes(self, attributes):
233-
self._attributes = Attributes(self, attributes)
249+
def _setAttributes(self, value):
250+
attributes = self.attributes
251+
attributes.clear()
252+
attributes.update(value)
234253

235254
attributes = property(_getAttributes, _setAttributes)
236255

237256
def insertText(self, data, insertBefore=None):
238257
data = infosetFilter.coerceCharacters(data)
239258
builder.Element.insertText(self, data, insertBefore)
240259

241-
def appendChild(self, child):
242-
builder.Element.appendChild(self, child)
260+
def cloneNode(self):
261+
element = type(self)(self.name, self.namespace)
262+
if self._element.attrib:
263+
element._element.attrib.update(self._element.attrib)
264+
return element
243265

244266
class Comment(builder.Comment):
245267
def __init__(self, data):

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy