Skip to content

Commit 0fdd819

Browse files
committed
Add some parser benchmarks
These tests are based on two things: some from WPT, and some based on Anolis
1 parent d49afd3 commit 0fdd819

36 files changed

+8220
-0
lines changed

benchmarks/bench_html.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import io
2+
import os
3+
import sys
4+
5+
import pyperf
6+
7+
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
8+
import html5lib # noqa: E402
9+
10+
11+
def bench_parse(fh, treebuilder):
12+
fh.seek(0)
13+
html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
14+
15+
16+
def bench_serialize(loops, fh, treebuilder):
17+
fh.seek(0)
18+
doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
19+
20+
range_it = range(loops)
21+
t0 = pyperf.perf_counter()
22+
23+
for loops in range_it:
24+
html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False)
25+
26+
return pyperf.perf_counter() - t0
27+
28+
29+
BENCHMARKS = ["parse", "serialize"]
30+
31+
32+
def add_cmdline_args(cmd, args):
33+
if args.benchmark:
34+
cmd.append(args.benchmark)
35+
36+
37+
if __name__ == "__main__":
38+
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
39+
runner.metadata["description"] = "Run benchmarks based on Anolis"
40+
runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS)
41+
42+
args = runner.parse_args()
43+
if args.benchmark:
44+
benchmarks = (args.benchmark,)
45+
else:
46+
benchmarks = BENCHMARKS
47+
48+
with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh:
49+
source = io.BytesIO(fh.read())
50+
51+
if "parse" in benchmarks:
52+
for tb in ("etree", "dom", "lxml"):
53+
runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb)
54+
55+
if "serialize" in benchmarks:
56+
for tb in ("etree", "dom", "lxml"):
57+
runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb)

benchmarks/bench_wpt.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import io
2+
import os
3+
import sys
4+
5+
import pyperf
6+
7+
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
8+
import html5lib # noqa: E402
9+
10+
11+
def bench_html5lib(fh):
12+
fh.seek(0)
13+
html5lib.parse(fh, treebuilder="etree", useChardet=False)
14+
15+
16+
def add_cmdline_args(cmd, args):
17+
if args.benchmark:
18+
cmd.append(args.benchmark)
19+
20+
21+
BENCHMARKS = {}
22+
for root, dirs, files in os.walk(os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "wpt")):
23+
for f in files:
24+
if f.endswith(".html"):
25+
BENCHMARKS[f[: -len(".html")]] = os.path.join(root, f)
26+
27+
28+
if __name__ == "__main__":
29+
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
30+
runner.metadata["description"] = "Run parser benchmarks from WPT"
31+
runner.argparser.add_argument("benchmark", nargs="?", choices=sorted(BENCHMARKS))
32+
33+
args = runner.parse_args()
34+
if args.benchmark:
35+
benchmarks = (args.benchmark,)
36+
else:
37+
benchmarks = sorted(BENCHMARKS)
38+
39+
for bench in benchmarks:
40+
name = "wpt_%s" % bench
41+
path = BENCHMARKS[bench]
42+
with open(path, "rb") as fh:
43+
fh2 = io.BytesIO(fh.read())
44+
45+
runner.bench_func(name, bench_html5lib, fh2)

benchmarks/data/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
The files in this data are derived from:
2+
3+
* `html.html`: from [html](http://github.com/whatwg/html), revision
4+
77db356a293f2b152b648c836b6989d17afe42bb. This is the first 5000 lines of `source`. (This is
5+
representative of the input to [Anolis](https://bitbucket.org/ms2ger/anolis/); first 5000 lines
6+
chosen to make it parse in a reasonable time.)
7+
8+
* `wpt`: see `wpt/README.md`.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy