Skip to content

Commit e11ed5a

Browse files
author
James Graham
committed
Add support for logging to parse.py
1 parent d34b631 commit e11ed5a

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

parse.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import sys
88
import os
9+
import traceback
910
from optparse import OptionParser
1011

1112
from html5lib import html5parser, sanitizer
@@ -48,10 +49,7 @@ def parse():
4849
else:
4950
tokenizer = HTMLTokenizer
5051

51-
if opts.log:
52-
html5parser.debug_log = True
53-
54-
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer)
52+
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log)
5553

5654
if opts.fragment:
5755
parseMethod = p.parseFragment
@@ -73,46 +71,54 @@ def parse():
7371
elif opts.time:
7472
import time
7573
t0 = time.time()
76-
document = parseMethod(f, encoding=encoding)
74+
document = run(parseMethod, f, encoding)
7775
t1 = time.time()
7876
printOutput(p, document, opts)
7977
t2 = time.time()
8078
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
8179
else:
82-
document = parseMethod(f, encoding=encoding)
80+
document = run(parseMethod, f, encoding)
8381
printOutput(p, document, opts)
8482

83+
def run(parseMethod, f, encoding):
84+
try:
85+
document = parseMethod(f, encoding=encoding)
86+
except:
87+
document = None
88+
traceback.print_exc()
89+
return document
90+
8591
def printOutput(parser, document, opts):
8692
if opts.encoding:
8793
print "Encoding:", parser.tokenizer.stream.charEncoding
8894

89-
if opts.log:
90-
for item in parser.log:
91-
print item
92-
93-
if opts.xml:
94-
sys.stdout.write(document.toxml("utf-8"))
95-
elif opts.tree:
96-
if not hasattr(document,'__getitem__'):
97-
document = [document]
98-
for fragment in document:
99-
print parser.tree.testSerializer(fragment).encode("utf-8")
100-
elif opts.hilite:
101-
sys.stdout.write(document.hilite("utf-8"))
102-
elif opts.html:
103-
kwargs = {}
104-
for opt in serializer.HTMLSerializer.options:
105-
try:
106-
kwargs[opt] = getattr(opts,opt)
107-
except:
108-
pass
109-
if not kwargs['quote_char']:
110-
del kwargs['quote_char']
111-
112-
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
113-
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding='utf-8'):
114-
sys.stdout.write(text)
115-
if not text.endswith('\n'): sys.stdout.write('\n')
95+
for item in parser.log:
96+
print item
97+
98+
if document is not None:
99+
if opts.xml:
100+
sys.stdout.write(document.toxml("utf-8"))
101+
elif opts.tree:
102+
if not hasattr(document,'__getitem__'):
103+
document = [document]
104+
for fragment in document:
105+
print parser.tree.testSerializer(fragment).encode("utf-8")
106+
elif opts.hilite:
107+
sys.stdout.write(document.hilite("utf-8"))
108+
elif opts.html:
109+
kwargs = {}
110+
for opt in serializer.HTMLSerializer.options:
111+
try:
112+
kwargs[opt] = getattr(opts,opt)
113+
except:
114+
pass
115+
if not kwargs['quote_char']:
116+
del kwargs['quote_char']
117+
118+
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
119+
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding='utf-8'):
120+
sys.stdout.write(text)
121+
if not text.endswith('\n'): sys.stdout.write('\n')
116122
if opts.error:
117123
errList=[]
118124
for pos, errorcode, datavars in parser.errors:

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy