Skip to content

Commit 0c551c9

Browse files
committed
Make lxml tree-builder coerce comments to work with lxml 3.5.
1 parent dae03f6 commit 0c551c9

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

html5lib/ihatexml.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ def coerceComment(self, data):
225225
while "--" in data:
226226
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
227227
data = data.replace("--", "- -")
228+
if data.endswith("-"):
229+
warnings.warn("Comments cannot end in a dash", DataLossWarning)
230+
data += " "
228231
return data
229232

230233
def coerceCharacters(self, data):

html5lib/treebuilders/etree_lxml.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def _getChildNodes(self):
5454
def testSerializer(element):
5555
rv = []
5656
finalText = None
57-
infosetFilter = ihatexml.InfosetFilter()
57+
infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
5858

5959
def serializeElement(element, indent=0):
6060
if not hasattr(element, "tag"):
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
189189

190190
def __init__(self, namespaceHTMLElements, fullTree=False):
191191
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
192-
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
192+
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
193193
self.namespaceHTMLElements = namespaceHTMLElements
194194

195195
class Attributes(dict):
@@ -257,7 +257,7 @@ def _getData(self):
257257
data = property(_getData, _setData)
258258

259259
self.elementClass = Element
260-
self.commentClass = builder.Comment
260+
self.commentClass = Comment
261261
# self.fragmentClass = builder.DocumentFragment
262262
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
263263

@@ -344,7 +344,8 @@ def insertRoot(self, token):
344344

345345
# Append the initial comments:
346346
for comment_token in self.initial_comments:
347-
root.addprevious(etree.Comment(comment_token["data"]))
347+
comment = self.commentClass(comment_token["data"])
348+
root.addprevious(comment._element)
348349

349350
# Create the root document and add the ElementTree to it
350351
self.document = self.documentClass()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy