diff --git a/html5lib/tests/expected-failures/tokenizer.dat b/html5lib/tests/expected-failures/tokenizer.dat new file mode 100644 index 00000000..e4fd1b03 --- /dev/null +++ b/html5lib/tests/expected-failures/tokenizer.dat @@ -0,0 +1,40 @@ +#data + + +#data + + +#data +abcd + +#data +ab + +#data + + +#data +ab + +#data +ab + +#data +ab + +#data +A + +#data +ab + +#data + + +#data +ab + +#data + te st + +#data +ab + +#data + te st + +#data +ab + +#data +ab + +#data +A + +#data + + +#data +ab + +#data +ab + +#data +ab diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 41f2d2a0..116387f9 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -6,6 +6,8 @@ import glob import xml.sax.handler +from nose.plugins.skip import SkipTest + base_path = os.path.split(__file__)[0] test_dir = os.path.join(base_path, 'testdata') @@ -128,7 +130,7 @@ def convertData(data): def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" % (repr(input), repr(expected), repr(actual))) - if sys.version_info.major == 2: + if sys.version_info[0] == 2: msg = msg.encode("ascii", "backslashreplace") return msg @@ -175,3 +177,17 @@ def processingInstruction(self, target, data): def skippedEntity(self, name): self.visited.append(('skippedEntity', name)) + + +def xfail(test): + """Expected fail decorator function""" + def t(*args, **kwargs): + try: + test(*args) + except SkipTest: + raise + except: + return + else: + assert False, "UNEXPECTED PASS" + return t diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index d774ce0f..1653610a 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -8,6 +8,8 @@ except AttributeError: unittest.TestCase.assertEqual = unittest.TestCase.assertEquals +from nose.plugins.skip import SkipTest + from .support import get_data_files, TestData, test_dir, errorMessage from html5lib import HTMLParser, inputstream @@ -41,7 +43,7 @@ def runPreScanEncodingTest(data, encoding): # Very crude way to ignore irrelevant tests if len(data) > stream.numBytesMeta: - return + raise SkipTest() assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0]) diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py index 230cdb42..13a52e67 100644 --- a/html5lib/tests/test_parser.py +++ b/html5lib/tests/test_parser.py @@ -8,8 +8,11 @@ warnings.simplefilter("error") +from nose.plugins.skip import SkipTest + from .support import get_data_files from .support import TestData, convert, convertExpected, treeTypes +from .support import xfail from html5lib import html5parser, constants # Run the parse error checks @@ -26,7 +29,11 @@ def convertTreeDump(data): def runParserTest(innerHTML, input, expected, errors, treeClass, - namespaceHTMLElements): + namespaceHTMLElements, scriptingDisabled): + if scriptingDisabled: + # We don't support the scripting disabled case! + raise SkipTest() + with warnings.catch_warnings(record=True) as caughtWarnings: warnings.simplefilter("always") p = html5parser.HTMLParser(tree=treeClass, @@ -46,7 +53,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass, if not issubclass(x.category, constants.DataLossWarning)] assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings] if len(caughtWarnings): - return + raise SkipTest() output = convertTreeDump(p.tree.testSerializer(document)) @@ -68,13 +75,27 @@ def runParserTest(innerHTML, input, expected, errors, treeClass, "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) if checkParseErrors: - assert len(p.errors) == len(errors), errorMsg2 + assert len(p.errors) == len(errors), errorMsg2 + + +@xfail +def xfailRunParserTest(*args, **kwargs): + return runParserTest(*args, **kwargs) def test_parser(): + # Testin' sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n") - files = get_data_files('tree-construction') + # Get xfails + filename = os.path.join(os.path.split(__file__)[0], + "expected-failures", + "tree-construction.dat") + xfails = TestData(filename, "data") + xfails = frozenset([x["data"] for x in xfails]) + + # Get the tests + files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") if testName in ("template",): @@ -84,13 +105,25 @@ def test_parser(): for index, test in enumerate(tests): input, errors, innerHTML, expected = [test[key] for key in - ('data', 'errors', + ('data', + 'errors', 'document-fragment', 'document')] + if errors: errors = errors.split("\n") + assert not ("script-off" in test and "script-on" in test), \ + ("The following test has scripting enabled" + + "and disabled all at once: %s in %s" % (input, filename)) + + scriptingDisabled = "script-off" in test + for treeName, treeCls in treeTypes.items(): for namespaceHTMLElements in (True, False): - yield (runParserTest, innerHTML, input, expected, errors, treeCls, - namespaceHTMLElements) + if input in xfails: + testFunc = xfailRunParserTest + else: + testFunc = runParserTest + yield (testFunc, innerHTML, input, expected, errors, treeCls, + namespaceHTMLElements, scriptingDisabled) diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py index 90315ab3..9de5fb70 100644 --- a/html5lib/tests/test_tokenizer.py +++ b/html5lib/tests/test_tokenizer.py @@ -3,8 +3,9 @@ import json import warnings import re +import os -from .support import get_data_files +from .support import get_data_files, TestData, xfail from html5lib.tokenizer import HTMLTokenizer from html5lib import constants @@ -107,6 +108,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, token.pop() if not ignoreErrorOrder and not ignoreErrors: + expectedTokens = concatenateCharacterTokens(expectedTokens) return expectedTokens == receivedTokens else: # Sort the tokens into two groups; non-parse errors and parse errors @@ -119,6 +121,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder, else: if not ignoreErrors: tokens[tokenType][1].append(token) + tokens[tokenType][0] = concatenateCharacterTokens(tokens[tokenType][0]) return tokens["expected"] == tokens["received"] @@ -143,13 +146,12 @@ def runTokenizerTest(test): warnings.resetwarnings() warnings.simplefilter("error") - expected = concatenateCharacterTokens(test['output']) + expected = test['output'] if 'lastStartTag' not in test: test['lastStartTag'] = None parser = TokenizerTestParser(test['initialState'], test['lastStartTag']) tokens = parser.parse(test['input']) - tokens = concatenateCharacterTokens(tokens) received = normalizeTokens(tokens) errorMsg = "\n".join(["\n\nInitial state:", test['initialState'], @@ -161,6 +163,11 @@ def runTokenizerTest(test): assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg +@xfail +def xfailRunTokenizerTest(*args, **kwargs): + return runTokenizerTest(*args, **kwargs) + + def _doCapitalize(match): return match.group(1).upper() @@ -174,6 +181,14 @@ def capitalize(s): def testTokenizer(): + # Get xfails + filename = os.path.join(os.path.split(__file__)[0], + "expected-failures", + "tokenizer.dat") + xfails = TestData(filename, "data") + xfails = frozenset([x["data"] for x in xfails]) + + # Get tests for filename in get_data_files('tokenizer', '*.test'): with open(filename) as fp: tests = json.load(fp) @@ -185,4 +200,8 @@ def testTokenizer(): test = unescape(test) for initialState in test["initialStates"]: test["initialState"] = capitalize(initialState) - yield runTokenizerTest, test + if test['input'] in xfails: + testFunc = xfailRunTokenizerTest + else: + testFunc = runTokenizerTest + yield testFunc, test diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index b7756035..4458ea0c 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -11,7 +11,9 @@ except AttributeError: unittest.TestCase.assertEqual = unittest.TestCase.assertEquals -from .support import get_data_files, TestData, convertExpected +from nose.plugins.skip import SkipTest + +from .support import get_data_files, TestData, convertExpected, xfail from html5lib import html5parser, treewalkers, treebuilders, constants @@ -250,7 +252,11 @@ def test_all_tokens(self): self.assertEqual(expectedToken, outputToken) -def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): +def runTreewalkerTest(innerHTML, input, expected, errors, treeClass, scriptingDisabled): + if scriptingDisabled: + # We don't support the scripting disabled case! + raise SkipTest() + warnings.resetwarnings() warnings.simplefilter("error") try: @@ -261,7 +267,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): document = p.parse(input) except constants.DataLossWarning: # Ignore testcases we know we don't pass - return + raise SkipTest() document = treeClass.get("adapter", lambda x: x)(document) try: @@ -278,12 +284,25 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): "", "Diff:", diff, ]) except NotImplementedError: - pass # Amnesty for those that confess... + raise SkipTest() # Amnesty for those that confess... + + +@xfail +def xfailRunTreewalkerTest(*args, **kwargs): + return runTreewalkerTest(*args, **kwargs) def test_treewalker(): sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n") + # Get xfails + filename = os.path.join(os.path.split(__file__)[0], + "expected-failures", + "tree-construction.dat") + xfails = TestData(filename, "data") + xfails = frozenset([x["data"] for x in xfails]) + + # Get the tests for treeName, treeCls in treeTypes.items(): files = get_data_files('tree-construction') for filename in files: @@ -299,7 +318,18 @@ def test_treewalker(): "document-fragment", "document")] errors = errors.split("\n") - yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls + + assert not ("script-off" in test and "script-on" in test), \ + ("The following test has scripting enabled" + + "and disabled all at once: %s in %s" % (input, filename)) + + scriptingDisabled = "script-off" in test + + if input in xfails: + testFunc = xfailRunTreewalkerTest + else: + testFunc = runTreewalkerTest + yield testFunc, innerHTML, input, expected, errors, treeCls, scriptingDisabled def set_attribute_on_first_child(docfrag, name, value, treeName): diff --git a/html5lib/tests/testdata b/html5lib/tests/testdata index a9badff0..11aec478 160000 --- a/html5lib/tests/testdata +++ b/html5lib/tests/testdata @@ -1 +1 @@ -Subproject commit a9badff0cd2fe337170769d42ca2df5e96d30f97 +Subproject commit 11aec478545744fe89ba17bac70fbcacdd76922b diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 35d08efa..fed37176 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -79,7 +79,7 @@ def serializeElement(element, indent=0): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 + assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy