diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8c6e30f1..315e95bf 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,6 +10,10 @@ jobs: deploy: runs-on: ubuntu-latest + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + steps: - uses: actions/checkout@v3 - name: Set up Python @@ -27,6 +31,3 @@ jobs: - name: Publish distribution 📦 to PyPI if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/MANIFEST.in b/MANIFEST.in index f41e2b99..4db9078b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,5 +16,6 @@ include pyperformance/data-files/benchmarks/bm_*/*.toml include pyperformance/data-files/benchmarks/bm_*/*.py include pyperformance/data-files/benchmarks/bm_*/requirements.txt include pyperformance/data-files/benchmarks/bm_*/*.pem +recursive-include pyperformance/data-files/benchmarks/bm_*/vendor * recursive-include pyperformance/data-files/benchmarks/bm_*/data * recursive-exclude pyperformance/tests * diff --git a/doc/changelog.rst b/doc/changelog.rst index 17dbb6ed..34eae5ed 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,11 @@ Changelog ========= +Version 1.0.9 (2023-06-14) +------------- +* Vendor lib2to3 for Python 3.13+ +* Add TaskGroups variants to async_tree benchmarks + Version 1.0.8 (2023-06-02) ------------- diff --git a/pyperformance/__init__.py b/pyperformance/__init__.py index 296e6539..92479b50 100644 --- a/pyperformance/__init__.py +++ b/pyperformance/__init__.py @@ -2,7 +2,7 @@ import sys -VERSION = (1, 0, 8) +VERSION = (1, 0, 9) __version__ = '.'.join(map(str, VERSION)) diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index 0a338e44..4420f98b 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -11,6 +11,14 @@ async_tree_eager async_tree_eager_cpu_io_mixed async_tree_eager_io async_tree_eager_memoization +async_tree_tg +async_tree_cpu_io_mixed_tg +async_tree_io_tg +async_tree_memoization_tg +async_tree_eager_tg +async_tree_eager_cpu_io_mixed_tg +async_tree_eager_io_tg +async_tree_eager_memoization_tg asyncio_tcp asyncio_tcp_ssl concurrent_imap @@ -82,6 +90,7 @@ xml_etree #[groups] +#asyncio #startup #regex #serialize diff --git a/pyperformance/data-files/benchmarks/bm_2to3/pyproject.toml b/pyperformance/data-files/benchmarks/bm_2to3/pyproject.toml index f35eb568..291c1d69 100644 --- a/pyperformance/data-files/benchmarks/bm_2to3/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_2to3/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + [project] name = "pyperformance_bm_2to3" requires-python = ">=3.8" diff --git a/pyperformance/data-files/benchmarks/bm_2to3/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_2to3/run_benchmark.py index 71d3ee94..93abe364 100644 --- a/pyperformance/data-files/benchmarks/bm_2to3/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_2to3/run_benchmark.py @@ -1,6 +1,7 @@ import glob import os.path import sys +import subprocess import pyperf @@ -13,6 +14,12 @@ datadir = os.path.join(os.path.dirname(__file__), 'data', '2to3') pyfiles = glob.glob(os.path.join(datadir, '*.py.txt')) - command = [sys.executable, "-m", "lib2to3", "-f", "all"] + pyfiles + + try: + import lib2to3 + except ModuleNotFoundError: + vendor = os.path.join(os.path.dirname(__file__), 'vendor') + subprocess.run([sys.executable, "-m", "pip", "install", vendor], check=True) + runner.bench_command('2to3', command) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/LICENSE.txt b/pyperformance/data-files/benchmarks/bm_2to3/vendor/LICENSE.txt new file mode 100644 index 00000000..5fe8a31f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/LICENSE.txt @@ -0,0 +1,48 @@ +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/pyproject.toml b/pyperformance/data-files/benchmarks/bm_2to3/vendor/pyproject.toml new file mode 100644 index 00000000..2559b1e4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "lib2to3" +version = "3.12" +description = "lib2to3 for Python 3.13+" +requires-python = ">=3.13.0a0" +license = {text = "PSF-2.0"} +classifiers = [ + "License :: OSI Approved :: Python Software Foundation License", +] + +[tool.setuptools.package-data] +"*" = ["*.txt"] + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/Grammar.txt b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/Grammar.txt new file mode 100644 index 00000000..fa7b1506 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/Grammar.txt @@ -0,0 +1,196 @@ +# Grammar for 2to3. This grammar supports Python 2.x and 3.x. + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# file_input is a module or sequence of commands read from an input file; +# single_input is a single interactive statement; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +file_input: (NEWLINE | stmt)* ENDMARKER +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) +async_funcdef: ASYNC funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' + +# The following definition for typedarglist is equivalent to this set of rules: +# +# arguments = argument (',' argument)* +# argument = tfpdef ['=' test] +# kwargs = '**' tname [','] +# args = '*' [tname] +# kwonly_kwargs = (',' argument)* [',' [kwargs]] +# args_kwonly_kwargs = args kwonly_kwargs | kwargs +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] +# typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs +# typedarglist = arguments ',' '/' [',' [typedargslist_no_posonly]])|(typedargslist_no_posonly)" +# +# It needs to be fully expanded to allow our LL(1) parser to work on it. + +typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [ + ',' [((tfpdef ['=' test] ',')* ('*' [tname] (',' tname ['=' test])* + [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])] + ] | ((tfpdef ['=' test] ',')* ('*' [tname] (',' tname ['=' test])* + [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + +tname: NAME [':' test] +tfpdef: tname | '(' tfplist ')' +tfplist: tfpdef (',' tfpdef)* [','] + +# The following definition for varargslist is equivalent to this set of rules: +# +# arguments = argument (',' argument )* +# argument = vfpdef ['=' test] +# kwargs = '**' vname [','] +# args = '*' [vname] +# kwonly_kwargs = (',' argument )* [',' [kwargs]] +# args_kwonly_kwargs = args kwonly_kwargs | kwargs +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] +# vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs +# varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly) +# +# It needs to be fully expanded to allow our LL(1) parser to work on it. + +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ + ((vfpdef ['=' test] ',')* ('*' [vname] (',' vname ['=' test])* + [',' ['**' vname [',']]] | '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + ]] | ((vfpdef ['=' test] ',')* + ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]]| '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + +vname: NAME +vfpdef: vname | '(' vfplist ')' +vfplist: vfpdef (',' vfpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: ASYNC (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +with_var: 'as' expr +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [(',' | 'as') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: [AWAIT] atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+ | '.' '.' '.') +listmaker: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +testlist_gexp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [comp_iter] +comp_if: 'if' old_test [comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/PatternGrammar.txt b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/PatternGrammar.txt new file mode 100644 index 00000000..36bf8148 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/PatternGrammar.txt @@ -0,0 +1,28 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# A grammar to describe tree matching patterns. +# Not shown here: +# - 'TOKEN' stands for any token (leaf node) +# - 'any' stands for any node (leaf or interior) +# With 'any' we can still specify the sub-structure. + +# The start symbol is 'Matcher'. + +Matcher: Alternatives ENDMARKER + +Alternatives: Alternative ('|' Alternative)* + +Alternative: (Unit | NegatedUnit)+ + +Unit: [NAME '='] ( STRING [Repeater] + | NAME [Details] [Repeater] + | '(' Alternatives ')' [Repeater] + | '[' Alternatives ']' + ) + +NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')') + +Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}' + +Details: '<' Alternatives '>' diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/__init__.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/__main__.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/__main__.py new file mode 100644 index 00000000..80688baf --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/__main__.py @@ -0,0 +1,4 @@ +import sys +from .main import main + +sys.exit(main("lib2to3.fixes")) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_matcher.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_matcher.py new file mode 100644 index 00000000..3b788680 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_matcher.py @@ -0,0 +1,163 @@ +"""A bottom-up tree matching algorithm implementation meant to speed +up 2to3's matching process. After the tree patterns are reduced to +their rarest linear path, a linear Aho-Corasick automaton is +created. The linear automaton traverses the linear paths from the +leaves to the root of the AST and returns a set of nodes for further +matching. This reduces significantly the number of candidate nodes.""" + +__author__ = "George Boutsioukis " + +import logging +import itertools +from collections import defaultdict + +from . import pytree +from .btm_utils import reduce_tree + +class BMNode(object): + """Class for a node of the Aho-Corasick automaton used in matching""" + count = itertools.count() + def __init__(self): + self.transition_table = {} + self.fixers = [] + self.id = next(BMNode.count) + self.content = '' + +class BottomMatcher(object): + """The main matcher class. After instantiating the patterns should + be added using the add_fixer method""" + + def __init__(self): + self.match = set() + self.root = BMNode() + self.nodes = [self.root] + self.fixers = [] + self.logger = logging.getLogger("RefactoringTool") + + def add_fixer(self, fixer): + """Reduces a fixer's pattern tree to a linear path and adds it + to the matcher(a common Aho-Corasick automaton). The fixer is + appended on the matching states and called when they are + reached""" + self.fixers.append(fixer) + tree = reduce_tree(fixer.pattern_tree) + linear = tree.get_linear_subpattern() + match_nodes = self.add(linear, start=self.root) + for match_node in match_nodes: + match_node.fixers.append(fixer) + + def add(self, pattern, start): + "Recursively adds a linear pattern to the AC automaton" + #print("adding pattern", pattern, "to", start) + if not pattern: + #print("empty pattern") + return [start] + if isinstance(pattern[0], tuple): + #alternatives + #print("alternatives") + match_nodes = [] + for alternative in pattern[0]: + #add all alternatives, and add the rest of the pattern + #to each end node + end_nodes = self.add(alternative, start=start) + for end in end_nodes: + match_nodes.extend(self.add(pattern[1:], end)) + return match_nodes + else: + #single token + #not last + if pattern[0] not in start.transition_table: + #transition did not exist, create new + next_node = BMNode() + start.transition_table[pattern[0]] = next_node + else: + #transition exists already, follow + next_node = start.transition_table[pattern[0]] + + if pattern[1:]: + end_nodes = self.add(pattern[1:], start=next_node) + else: + end_nodes = [next_node] + return end_nodes + + def run(self, leaves): + """The main interface with the bottom matcher. The tree is + traversed from the bottom using the constructed + automaton. Nodes are only checked once as the tree is + retraversed. When the automaton fails, we give it one more + shot(in case the above tree matches as a whole with the + rejected leaf), then we break for the next leaf. There is the + special case of multiple arguments(see code comments) where we + recheck the nodes + + Args: + The leaves of the AST tree to be matched + + Returns: + A dictionary of node matches with fixers as the keys + """ + current_ac_node = self.root + results = defaultdict(list) + for leaf in leaves: + current_ast_node = leaf + while current_ast_node: + current_ast_node.was_checked = True + for child in current_ast_node.children: + # multiple statements, recheck + if isinstance(child, pytree.Leaf) and child.value == ";": + current_ast_node.was_checked = False + break + if current_ast_node.type == 1: + #name + node_token = current_ast_node.value + else: + node_token = current_ast_node.type + + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + else: + #matching failed, reset automaton + current_ac_node = self.root + if (current_ast_node.parent is not None + and current_ast_node.parent.was_checked): + #the rest of the tree upwards has been checked, next leaf + break + + #recheck the rejected node once from the root + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + + current_ast_node = current_ast_node.parent + return results + + def print_ac(self): + "Prints a graphviz diagram of the BM automaton(for debugging)" + print("digraph g{") + def print_node(node): + for subnode_key in node.transition_table.keys(): + subnode = node.transition_table[subnode_key] + print("%d -> %d [label=%s] //%s" % + (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) + if subnode_key == 1: + print(subnode.content) + print_node(subnode) + print_node(self.root) + print("}") + +# taken from pytree.py for debugging; only used by print_ac +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_utils.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_utils.py new file mode 100644 index 00000000..b61afdba --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/btm_utils.py @@ -0,0 +1,280 @@ +"Utility functions used by the btm_matcher module" + +from . import pytree +from .pgen2 import grammar, token +from .pygram import pattern_symbols, python_symbols + +syms = pattern_symbols +pysyms = python_symbols +tokens = grammar.opmap +token_labels = token + +TYPE_ANY = -1 +TYPE_ALTERNATIVES = -2 +TYPE_GROUP = -3 + +class MinNode(object): + """This class serves as an intermediate representation of the + pattern tree during the conversion to sets of leaf-to-root + subpatterns""" + + def __init__(self, type=None, name=None): + self.type = type + self.name = name + self.children = [] + self.leaf = False + self.parent = None + self.alternatives = [] + self.group = [] + + def __repr__(self): + return str(self.type) + ' ' + str(self.name) + + def leaf_to_root(self): + """Internal method. Returns a characteristic path of the + pattern tree. This method must be run for all leaves until the + linear subpatterns are merged into a single""" + node = self + subp = [] + while node: + if node.type == TYPE_ALTERNATIVES: + node.alternatives.append(subp) + if len(node.alternatives) == len(node.children): + #last alternative + subp = [tuple(node.alternatives)] + node.alternatives = [] + node = node.parent + continue + else: + node = node.parent + subp = None + break + + if node.type == TYPE_GROUP: + node.group.append(subp) + #probably should check the number of leaves + if len(node.group) == len(node.children): + subp = get_characteristic_subpattern(node.group) + node.group = [] + node = node.parent + continue + else: + node = node.parent + subp = None + break + + if node.type == token_labels.NAME and node.name: + #in case of type=name, use the name instead + subp.append(node.name) + else: + subp.append(node.type) + + node = node.parent + return subp + + def get_linear_subpattern(self): + """Drives the leaf_to_root method. The reason that + leaf_to_root must be run multiple times is because we need to + reject 'group' matches; for example the alternative form + (a | b c) creates a group [b c] that needs to be matched. Since + matching multiple linear patterns overcomes the automaton's + capabilities, leaf_to_root merges each group into a single + choice based on 'characteristic'ity, + + i.e. (a|b c) -> (a|b) if b more characteristic than c + + Returns: The most 'characteristic'(as defined by + get_characteristic_subpattern) path for the compiled pattern + tree. + """ + + for l in self.leaves(): + subp = l.leaf_to_root() + if subp: + return subp + + def leaves(self): + "Generator that returns the leaves of the tree" + for child in self.children: + yield from child.leaves() + if not self.children: + yield self + +def reduce_tree(node, parent=None): + """ + Internal function. Reduces a compiled pattern tree to an + intermediate representation suitable for feeding the + automaton. This also trims off any optional pattern elements(like + [a], a*). + """ + + new_node = None + #switch on the node type + if node.type == syms.Matcher: + #skip + node = node.children[0] + + if node.type == syms.Alternatives : + #2 cases + if len(node.children) <= 2: + #just a single 'Alternative', skip this node + new_node = reduce_tree(node.children[0], parent) + else: + #real alternatives + new_node = MinNode(type=TYPE_ALTERNATIVES) + #skip odd children('|' tokens) + for child in node.children: + if node.children.index(child)%2: + continue + reduced = reduce_tree(child, new_node) + if reduced is not None: + new_node.children.append(reduced) + elif node.type == syms.Alternative: + if len(node.children) > 1: + + new_node = MinNode(type=TYPE_GROUP) + for child in node.children: + reduced = reduce_tree(child, new_node) + if reduced: + new_node.children.append(reduced) + if not new_node.children: + # delete the group if all of the children were reduced to None + new_node = None + + else: + new_node = reduce_tree(node.children[0], parent) + + elif node.type == syms.Unit: + if (isinstance(node.children[0], pytree.Leaf) and + node.children[0].value == '('): + #skip parentheses + return reduce_tree(node.children[1], parent) + if ((isinstance(node.children[0], pytree.Leaf) and + node.children[0].value == '[') + or + (len(node.children)>1 and + hasattr(node.children[1], "value") and + node.children[1].value == '[')): + #skip whole unit if its optional + return None + + leaf = True + details_node = None + alternatives_node = None + has_repeater = False + repeater_node = None + has_variable_name = False + + for child in node.children: + if child.type == syms.Details: + leaf = False + details_node = child + elif child.type == syms.Repeater: + has_repeater = True + repeater_node = child + elif child.type == syms.Alternatives: + alternatives_node = child + if hasattr(child, 'value') and child.value == '=': # variable name + has_variable_name = True + + #skip variable name + if has_variable_name: + #skip variable name, '=' + name_leaf = node.children[2] + if hasattr(name_leaf, 'value') and name_leaf.value == '(': + # skip parenthesis + name_leaf = node.children[3] + else: + name_leaf = node.children[0] + + #set node type + if name_leaf.type == token_labels.NAME: + #(python) non-name or wildcard + if name_leaf.value == 'any': + new_node = MinNode(type=TYPE_ANY) + else: + if hasattr(token_labels, name_leaf.value): + new_node = MinNode(type=getattr(token_labels, name_leaf.value)) + else: + new_node = MinNode(type=getattr(pysyms, name_leaf.value)) + + elif name_leaf.type == token_labels.STRING: + #(python) name or character; remove the apostrophes from + #the string value + name = name_leaf.value.strip("'") + if name in tokens: + new_node = MinNode(type=tokens[name]) + else: + new_node = MinNode(type=token_labels.NAME, name=name) + elif name_leaf.type == syms.Alternatives: + new_node = reduce_tree(alternatives_node, parent) + + #handle repeaters + if has_repeater: + if repeater_node.children[0].value == '*': + #reduce to None + new_node = None + elif repeater_node.children[0].value == '+': + #reduce to a single occurrence i.e. do nothing + pass + else: + #TODO: handle {min, max} repeaters + raise NotImplementedError + + #add children + if details_node and new_node is not None: + for child in details_node.children[1:-1]: + #skip '<', '>' markers + reduced = reduce_tree(child, new_node) + if reduced is not None: + new_node.children.append(reduced) + if new_node: + new_node.parent = parent + return new_node + + +def get_characteristic_subpattern(subpatterns): + """Picks the most characteristic from a list of linear patterns + Current order used is: + names > common_names > common_chars + """ + if not isinstance(subpatterns, list): + return subpatterns + if len(subpatterns)==1: + return subpatterns[0] + + # first pick out the ones containing variable names + subpatterns_with_names = [] + subpatterns_with_common_names = [] + common_names = ['in', 'for', 'if' , 'not', 'None'] + subpatterns_with_common_chars = [] + common_chars = "[]().,:" + for subpattern in subpatterns: + if any(rec_test(subpattern, lambda x: type(x) is str)): + if any(rec_test(subpattern, + lambda x: isinstance(x, str) and x in common_chars)): + subpatterns_with_common_chars.append(subpattern) + elif any(rec_test(subpattern, + lambda x: isinstance(x, str) and x in common_names)): + subpatterns_with_common_names.append(subpattern) + + else: + subpatterns_with_names.append(subpattern) + + if subpatterns_with_names: + subpatterns = subpatterns_with_names + elif subpatterns_with_common_names: + subpatterns = subpatterns_with_common_names + elif subpatterns_with_common_chars: + subpatterns = subpatterns_with_common_chars + # of the remaining subpatterns pick out the longest one + return max(subpatterns, key=len) + +def rec_test(sequence, test_func): + """Tests test_func on all items of sequence and items of included + sub-iterables""" + for x in sequence: + if isinstance(x, (list, tuple)): + yield from rec_test(x, test_func) + else: + yield test_func(x) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_base.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_base.py new file mode 100644 index 00000000..df581a4d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_base.py @@ -0,0 +1,186 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Base class for fixers (optional, but recommended).""" + +# Python imports +import itertools + +# Local imports +from .patcomp import PatternCompiler +from . import pygram +from .fixer_util import does_tree_import + +class BaseFix(object): + + """Optional base class for fixers. + + The subclass name must be FixFooBar where FooBar is the result of + removing underscores and capitalizing the words of the fix name. + For example, the class name for a fixer named 'has_key' should be + FixHasKey. + """ + + PATTERN = None # Most subclasses should override with a string literal + pattern = None # Compiled pattern, set by compile_pattern() + pattern_tree = None # Tree representation of the pattern + options = None # Options object passed to initializer + filename = None # The filename (set by set_filename) + numbers = itertools.count(1) # For new_name() + used_names = set() # A set of all used NAMEs + order = "post" # Does the fixer prefer pre- or post-order traversal + explicit = False # Is this ignored by refactor.py -f all? + run_order = 5 # Fixers will be sorted by run order before execution + # Lower numbers will be run first. + _accept_type = None # [Advanced and not public] This tells RefactoringTool + # which node type to accept when there's not a pattern. + + keep_line_order = False # For the bottom matcher: match with the + # original line order + BM_compatible = False # Compatibility with the bottom matching + # module; every fixer should set this + # manually + + # Shortcut for access to Python grammar symbols + syms = pygram.python_symbols + + def __init__(self, options, log): + """Initializer. Subclass may override. + + Args: + options: a dict containing the options passed to RefactoringTool + that could be used to customize the fixer through the command line. + log: a list to append warnings and other messages to. + """ + self.options = options + self.log = log + self.compile_pattern() + + def compile_pattern(self): + """Compiles self.PATTERN into self.pattern. + + Subclass may override if it doesn't want to use + self.{pattern,PATTERN} in .match(). + """ + if self.PATTERN is not None: + PC = PatternCompiler() + self.pattern, self.pattern_tree = PC.compile_pattern(self.PATTERN, + with_tree=True) + + def set_filename(self, filename): + """Set the filename. + + The main refactoring tool should call this. + """ + self.filename = filename + + def match(self, node): + """Returns match for a given parse tree node. + + Should return a true or false object (not necessarily a bool). + It may return a non-empty dict of matching sub-nodes as + returned by a matching pattern. + + Subclass may override. + """ + results = {"node": node} + return self.pattern.match(node, results) and results + + def transform(self, node, results): + """Returns the transformation for a given parse tree node. + + Args: + node: the root of the parse tree that matched the fixer. + results: a dict mapping symbolic names to part of the match. + + Returns: + None, or a node that is a modified copy of the + argument node. The node argument may also be modified in-place to + effect the same change. + + Subclass *must* override. + """ + raise NotImplementedError() + + def new_name(self, template="xxx_todo_changeme"): + """Return a string suitable for use as an identifier + + The new name is guaranteed not to conflict with other identifiers. + """ + name = template + while name in self.used_names: + name = template + str(next(self.numbers)) + self.used_names.add(name) + return name + + def log_message(self, message): + if self.first_log: + self.first_log = False + self.log.append("### In file %s ###" % self.filename) + self.log.append(message) + + def cannot_convert(self, node, reason=None): + """Warn the user that a given chunk of code is not valid Python 3, + but that it cannot be converted automatically. + + First argument is the top-level node for the code in question. + Optional second argument is why it can't be converted. + """ + lineno = node.get_lineno() + for_output = node.clone() + for_output.prefix = "" + msg = "Line %d: could not convert: %s" + self.log_message(msg % (lineno, for_output)) + if reason: + self.log_message(reason) + + def warning(self, node, reason): + """Used for warning the user about possible uncertainty in the + translation. + + First argument is the top-level node for the code in question. + Optional second argument is why it can't be converted. + """ + lineno = node.get_lineno() + self.log_message("Line %d: %s" % (lineno, reason)) + + def start_tree(self, tree, filename): + """Some fixers need to maintain tree-wide state. + This method is called once, at the start of tree fix-up. + + tree - the root node of the tree to be processed. + filename - the name of the file the tree came from. + """ + self.used_names = tree.used_names + self.set_filename(filename) + self.numbers = itertools.count(1) + self.first_log = True + + def finish_tree(self, tree, filename): + """Some fixers need to maintain tree-wide state. + This method is called once, at the conclusion of tree fix-up. + + tree - the root node of the tree to be processed. + filename - the name of the file the tree came from. + """ + pass + + +class ConditionalFix(BaseFix): + """ Base class for fixers which not execute if an import is found. """ + + # This is the name of the import which, if found, will cause the test to be skipped + skip_on = None + + def start_tree(self, *args): + super(ConditionalFix, self).start_tree(*args) + self._should_skip = None + + def should_skip(self, node): + if self._should_skip is not None: + return self._should_skip + pkg = self.skip_on.split(".") + name = pkg[-1] + pkg = ".".join(pkg[:-1]) + self._should_skip = does_tree_import(pkg, name, node) + return self._should_skip diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_util.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_util.py new file mode 100644 index 00000000..c2a3a47f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixer_util.py @@ -0,0 +1,453 @@ +"""Utility functions, node construction macros, etc.""" +# Author: Collin Winter + +# Local imports +from .pgen2 import token +from .pytree import Leaf, Node +from .pygram import python_symbols as syms +from . import patcomp + + +########################################################### +### Common node-construction "macros" +########################################################### + +def KeywordArg(keyword, value): + return Node(syms.argument, + [keyword, Leaf(token.EQUAL, "="), value]) + +def LParen(): + return Leaf(token.LPAR, "(") + +def RParen(): + return Leaf(token.RPAR, ")") + +def Assign(target, source): + """Build an assignment statement""" + if not isinstance(target, list): + target = [target] + if not isinstance(source, list): + source.prefix = " " + source = [source] + + return Node(syms.atom, + target + [Leaf(token.EQUAL, "=", prefix=" ")] + source) + +def Name(name, prefix=None): + """Return a NAME leaf""" + return Leaf(token.NAME, name, prefix=prefix) + +def Attr(obj, attr): + """A node tuple for obj.attr""" + return [obj, Node(syms.trailer, [Dot(), attr])] + +def Comma(): + """A comma leaf""" + return Leaf(token.COMMA, ",") + +def Dot(): + """A period (.) leaf""" + return Leaf(token.DOT, ".") + +def ArgList(args, lparen=LParen(), rparen=RParen()): + """A parenthesised argument list, used by Call()""" + node = Node(syms.trailer, [lparen.clone(), rparen.clone()]) + if args: + node.insert_child(1, Node(syms.arglist, args)) + return node + +def Call(func_name, args=None, prefix=None): + """A function call""" + node = Node(syms.power, [func_name, ArgList(args)]) + if prefix is not None: + node.prefix = prefix + return node + +def Newline(): + """A newline literal""" + return Leaf(token.NEWLINE, "\n") + +def BlankLine(): + """A blank line""" + return Leaf(token.NEWLINE, "") + +def Number(n, prefix=None): + return Leaf(token.NUMBER, n, prefix=prefix) + +def Subscript(index_node): + """A numeric or string subscript""" + return Node(syms.trailer, [Leaf(token.LBRACE, "["), + index_node, + Leaf(token.RBRACE, "]")]) + +def String(string, prefix=None): + """A string leaf""" + return Leaf(token.STRING, string, prefix=prefix) + +def ListComp(xp, fp, it, test=None): + """A list comprehension of the form [xp for fp in it if test]. + + If test is None, the "if test" part is omitted. + """ + xp.prefix = "" + fp.prefix = " " + it.prefix = " " + for_leaf = Leaf(token.NAME, "for") + for_leaf.prefix = " " + in_leaf = Leaf(token.NAME, "in") + in_leaf.prefix = " " + inner_args = [for_leaf, fp, in_leaf, it] + if test: + test.prefix = " " + if_leaf = Leaf(token.NAME, "if") + if_leaf.prefix = " " + inner_args.append(Node(syms.comp_if, [if_leaf, test])) + inner = Node(syms.listmaker, [xp, Node(syms.comp_for, inner_args)]) + return Node(syms.atom, + [Leaf(token.LBRACE, "["), + inner, + Leaf(token.RBRACE, "]")]) + +def FromImport(package_name, name_leafs): + """ Return an import statement in the form: + from package import name_leafs""" + # XXX: May not handle dotted imports properly (eg, package_name='foo.bar') + #assert package_name == '.' or '.' not in package_name, "FromImport has "\ + # "not been tested with dotted package names -- use at your own "\ + # "peril!" + + for leaf in name_leafs: + # Pull the leaves out of their old tree + leaf.remove() + + children = [Leaf(token.NAME, "from"), + Leaf(token.NAME, package_name, prefix=" "), + Leaf(token.NAME, "import", prefix=" "), + Node(syms.import_as_names, name_leafs)] + imp = Node(syms.import_from, children) + return imp + +def ImportAndCall(node, results, names): + """Returns an import statement and calls a method + of the module: + + import module + module.name()""" + obj = results["obj"].clone() + if obj.type == syms.arglist: + newarglist = obj.clone() + else: + newarglist = Node(syms.arglist, [obj.clone()]) + after = results["after"] + if after: + after = [n.clone() for n in after] + new = Node(syms.power, + Attr(Name(names[0]), Name(names[1])) + + [Node(syms.trailer, + [results["lpar"].clone(), + newarglist, + results["rpar"].clone()])] + after) + new.prefix = node.prefix + return new + + +########################################################### +### Determine whether a node represents a given literal +########################################################### + +def is_tuple(node): + """Does the node represent a tuple literal?""" + if isinstance(node, Node) and node.children == [LParen(), RParen()]: + return True + return (isinstance(node, Node) + and len(node.children) == 3 + and isinstance(node.children[0], Leaf) + and isinstance(node.children[1], Node) + and isinstance(node.children[2], Leaf) + and node.children[0].value == "(" + and node.children[2].value == ")") + +def is_list(node): + """Does the node represent a list literal?""" + return (isinstance(node, Node) + and len(node.children) > 1 + and isinstance(node.children[0], Leaf) + and isinstance(node.children[-1], Leaf) + and node.children[0].value == "[" + and node.children[-1].value == "]") + + +########################################################### +### Misc +########################################################### + +def parenthesize(node): + return Node(syms.atom, [LParen(), node, RParen()]) + + +consuming_calls = {"sorted", "list", "set", "any", "all", "tuple", "sum", + "min", "max", "enumerate"} + +def attr_chain(obj, attr): + """Follow an attribute chain. + + If you have a chain of objects where a.foo -> b, b.foo-> c, etc, + use this to iterate over all objects in the chain. Iteration is + terminated by getattr(x, attr) is None. + + Args: + obj: the starting object + attr: the name of the chaining attribute + + Yields: + Each successive object in the chain. + """ + next = getattr(obj, attr) + while next: + yield next + next = getattr(next, attr) + +p0 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + """ +p1 = """ +power< + ( 'iter' | 'list' | 'tuple' | 'sorted' | 'set' | 'sum' | + 'any' | 'all' | 'enumerate' | (any* trailer< '.' 'join' >) ) + trailer< '(' node=any ')' > + any* +> +""" +p2 = """ +power< + ( 'sorted' | 'enumerate' ) + trailer< '(' arglist ')' > + any* +> +""" +pats_built = False +def in_special_context(node): + """ Returns true if node is in an environment where all that is required + of it is being iterable (ie, it doesn't matter if it returns a list + or an iterator). + See test_map_nochange in test_fixers.py for some examples and tests. + """ + global p0, p1, p2, pats_built + if not pats_built: + p0 = patcomp.compile_pattern(p0) + p1 = patcomp.compile_pattern(p1) + p2 = patcomp.compile_pattern(p2) + pats_built = True + patterns = [p0, p1, p2] + for pattern, parent in zip(patterns, attr_chain(node, "parent")): + results = {} + if pattern.match(parent, results) and results["node"] is node: + return True + return False + +def is_probably_builtin(node): + """ + Check that something isn't an attribute or function name etc. + """ + prev = node.prev_sibling + if prev is not None and prev.type == token.DOT: + # Attribute lookup. + return False + parent = node.parent + if parent.type in (syms.funcdef, syms.classdef): + return False + if parent.type == syms.expr_stmt and parent.children[0] is node: + # Assignment. + return False + if parent.type == syms.parameters or \ + (parent.type == syms.typedargslist and ( + (prev is not None and prev.type == token.COMMA) or + parent.children[0] is node + )): + # The name of an argument. + return False + return True + +def find_indentation(node): + """Find the indentation of *node*.""" + while node is not None: + if node.type == syms.suite and len(node.children) > 2: + indent = node.children[1] + if indent.type == token.INDENT: + return indent.value + node = node.parent + return "" + +########################################################### +### The following functions are to find bindings in a suite +########################################################### + +def make_suite(node): + if node.type == syms.suite: + return node + node = node.clone() + parent, node.parent = node.parent, None + suite = Node(syms.suite, [node]) + suite.parent = parent + return suite + +def find_root(node): + """Find the top level namespace.""" + # Scamper up to the top level namespace + while node.type != syms.file_input: + node = node.parent + if not node: + raise ValueError("root found before file_input node was found.") + return node + +def does_tree_import(package, name, node): + """ Returns true if name is imported from package at the + top level of the tree which node belongs to. + To cover the case of an import like 'import foo', use + None for the package and 'foo' for the name. """ + binding = find_binding(name, find_root(node), package) + return bool(binding) + +def is_import(node): + """Returns true if the node is an import statement.""" + return node.type in (syms.import_name, syms.import_from) + +def touch_import(package, name, node): + """ Works like `does_tree_import` but adds an import statement + if it was not imported. """ + def is_import_stmt(node): + return (node.type == syms.simple_stmt and node.children and + is_import(node.children[0])) + + root = find_root(node) + + if does_tree_import(package, name, root): + return + + # figure out where to insert the new import. First try to find + # the first import and then skip to the last one. + insert_pos = offset = 0 + for idx, node in enumerate(root.children): + if not is_import_stmt(node): + continue + for offset, node2 in enumerate(root.children[idx:]): + if not is_import_stmt(node2): + break + insert_pos = idx + offset + break + + # if there are no imports where we can insert, find the docstring. + # if that also fails, we stick to the beginning of the file + if insert_pos == 0: + for idx, node in enumerate(root.children): + if (node.type == syms.simple_stmt and node.children and + node.children[0].type == token.STRING): + insert_pos = idx + 1 + break + + if package is None: + import_ = Node(syms.import_name, [ + Leaf(token.NAME, "import"), + Leaf(token.NAME, name, prefix=" ") + ]) + else: + import_ = FromImport(package, [Leaf(token.NAME, name, prefix=" ")]) + + children = [import_, Newline()] + root.insert_child(insert_pos, Node(syms.simple_stmt, children)) + + +_def_syms = {syms.classdef, syms.funcdef} +def find_binding(name, node, package=None): + """ Returns the node which binds variable name, otherwise None. + If optional argument package is supplied, only imports will + be returned. + See test cases for examples.""" + for child in node.children: + ret = None + if child.type == syms.for_stmt: + if _find(name, child.children[1]): + return child + n = find_binding(name, make_suite(child.children[-1]), package) + if n: ret = n + elif child.type in (syms.if_stmt, syms.while_stmt): + n = find_binding(name, make_suite(child.children[-1]), package) + if n: ret = n + elif child.type == syms.try_stmt: + n = find_binding(name, make_suite(child.children[2]), package) + if n: + ret = n + else: + for i, kid in enumerate(child.children[3:]): + if kid.type == token.COLON and kid.value == ":": + # i+3 is the colon, i+4 is the suite + n = find_binding(name, make_suite(child.children[i+4]), package) + if n: ret = n + elif child.type in _def_syms and child.children[1].value == name: + ret = child + elif _is_import_binding(child, name, package): + ret = child + elif child.type == syms.simple_stmt: + ret = find_binding(name, child, package) + elif child.type == syms.expr_stmt: + if _find(name, child.children[0]): + ret = child + + if ret: + if not package: + return ret + if is_import(ret): + return ret + return None + +_block_syms = {syms.funcdef, syms.classdef, syms.trailer} +def _find(name, node): + nodes = [node] + while nodes: + node = nodes.pop() + if node.type > 256 and node.type not in _block_syms: + nodes.extend(node.children) + elif node.type == token.NAME and node.value == name: + return node + return None + +def _is_import_binding(node, name, package=None): + """ Will return node if node will import name, or node + will import * from package. None is returned otherwise. + See test cases for examples. """ + + if node.type == syms.import_name and not package: + imp = node.children[1] + if imp.type == syms.dotted_as_names: + for child in imp.children: + if child.type == syms.dotted_as_name: + if child.children[2].value == name: + return node + elif child.type == token.NAME and child.value == name: + return node + elif imp.type == syms.dotted_as_name: + last = imp.children[-1] + if last.type == token.NAME and last.value == name: + return node + elif imp.type == token.NAME and imp.value == name: + return node + elif node.type == syms.import_from: + # str(...) is used to make life easier here, because + # from a.b import parses to ['import', ['a', '.', 'b'], ...] + if package and str(node.children[1]).strip() != package: + return None + n = node.children[3] + if package and _find("as", n): + # See test_from_import_as for explanation + return None + elif n.type == syms.import_as_names and _find(name, n): + return node + elif n.type == syms.import_as_name: + child = n.children[2] + if child.type == token.NAME and child.value == name: + return node + elif n.type == token.NAME and n.value == name: + return node + elif package and n.type == token.STAR: + return node + return None diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/__init__.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/__init__.py new file mode 100644 index 00000000..b93054b3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/__init__.py @@ -0,0 +1 @@ +# Dummy file to make this directory a package. diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_apply.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_apply.py new file mode 100644 index 00000000..6408582c --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_apply.py @@ -0,0 +1,68 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for apply(). + +This converts apply(func, v, k) into (func)(*v, **k).""" + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Call, Comma, parenthesize + +class FixApply(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'apply' + trailer< + '(' + arglist< + (not argument + ')' + > + > + """ + + def transform(self, node, results): + syms = self.syms + assert results + func = results["func"] + args = results["args"] + kwds = results.get("kwds") + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + if args: + if (args.type == self.syms.argument and + args.children[0].value in {'**', '*'}): + return # Make no change. + if kwds and (kwds.type == self.syms.argument and + kwds.children[0].value == '**'): + return # Make no change. + prefix = node.prefix + func = func.clone() + if (func.type not in (token.NAME, syms.atom) and + (func.type != syms.power or + func.children[-2].type == token.DOUBLESTAR)): + # Need to parenthesize + func = parenthesize(func) + func.prefix = "" + args = args.clone() + args.prefix = "" + if kwds is not None: + kwds = kwds.clone() + kwds.prefix = "" + l_newargs = [pytree.Leaf(token.STAR, "*"), args] + if kwds is not None: + l_newargs.extend([Comma(), + pytree.Leaf(token.DOUBLESTAR, "**"), + kwds]) + l_newargs[-2].prefix = " " # that's the ** token + # XXX Sometimes we could be cleverer, e.g. apply(f, (x, y) + t) + # can be translated into f(x, y, *t) instead of f(*(x, y) + t) + #new = pytree.Node(syms.power, (func, ArgList(l_newargs))) + return Call(func, l_newargs, prefix=prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_asserts.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_asserts.py new file mode 100644 index 00000000..5bcec885 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_asserts.py @@ -0,0 +1,34 @@ +"""Fixer that replaces deprecated unittest method names.""" + +# Author: Ezio Melotti + +from ..fixer_base import BaseFix +from ..fixer_util import Name + +NAMES = dict( + assert_="assertTrue", + assertEquals="assertEqual", + assertNotEquals="assertNotEqual", + assertAlmostEquals="assertAlmostEqual", + assertNotAlmostEquals="assertNotAlmostEqual", + assertRegexpMatches="assertRegex", + assertRaisesRegexp="assertRaisesRegex", + failUnlessEqual="assertEqual", + failIfEqual="assertNotEqual", + failUnlessAlmostEqual="assertAlmostEqual", + failIfAlmostEqual="assertNotAlmostEqual", + failUnless="assertTrue", + failUnlessRaises="assertRaises", + failIf="assertFalse", +) + + +class FixAsserts(BaseFix): + + PATTERN = """ + power< any+ trailer< '.' meth=(%s)> any* > + """ % '|'.join(map(repr, NAMES)) + + def transform(self, node, results): + name = results["meth"][0] + name.replace(Name(NAMES[str(name)], prefix=name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_basestring.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_basestring.py new file mode 100644 index 00000000..5fe69a0f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_basestring.py @@ -0,0 +1,14 @@ +"""Fixer for basestring -> str.""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixBasestring(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = "'basestring'" + + def transform(self, node, results): + return Name("str", prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_buffer.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_buffer.py new file mode 100644 index 00000000..f9a1958a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_buffer.py @@ -0,0 +1,22 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes buffer(...) into memoryview(...).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixBuffer(fixer_base.BaseFix): + BM_compatible = True + + explicit = True # The user must ask for this fixer + + PATTERN = """ + power< name='buffer' trailer< '(' [any] ')' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("memoryview", prefix=name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_dict.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_dict.py new file mode 100644 index 00000000..d3655c9f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_dict.py @@ -0,0 +1,106 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for dict methods. + +d.keys() -> list(d.keys()) +d.items() -> list(d.items()) +d.values() -> list(d.values()) + +d.iterkeys() -> iter(d.keys()) +d.iteritems() -> iter(d.items()) +d.itervalues() -> iter(d.values()) + +d.viewkeys() -> d.keys() +d.viewitems() -> d.items() +d.viewvalues() -> d.values() + +Except in certain very specific contexts: the iter() can be dropped +when the context is list(), sorted(), iter() or for...in; the list() +can be dropped when the context is list() or sorted() (but not iter() +or for...in!). Special contexts that apply to both: list(), sorted(), tuple() +set(), any(), all(), sum(). + +Note: iter(d.keys()) could be written as iter(d) but since the +original d.iterkeys() was also redundant we don't fix this. And there +are (rare) contexts where it makes a difference (e.g. when passing it +as an argument to a function that introspects the argument). +""" + +# Local imports +from .. import pytree +from .. import patcomp +from .. import fixer_base +from ..fixer_util import Name, Call, Dot +from .. import fixer_util + + +iter_exempt = fixer_util.consuming_calls | {"iter"} + + +class FixDict(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< head=any+ + trailer< '.' method=('keys'|'items'|'values'| + 'iterkeys'|'iteritems'|'itervalues'| + 'viewkeys'|'viewitems'|'viewvalues') > + parens=trailer< '(' ')' > + tail=any* + > + """ + + def transform(self, node, results): + head = results["head"] + method = results["method"][0] # Extract node for method name + tail = results["tail"] + syms = self.syms + method_name = method.value + isiter = method_name.startswith("iter") + isview = method_name.startswith("view") + if isiter or isview: + method_name = method_name[4:] + assert method_name in ("keys", "items", "values"), repr(method) + head = [n.clone() for n in head] + tail = [n.clone() for n in tail] + special = not tail and self.in_special_context(node, isiter) + args = head + [pytree.Node(syms.trailer, + [Dot(), + Name(method_name, + prefix=method.prefix)]), + results["parens"].clone()] + new = pytree.Node(syms.power, args) + if not (special or isview): + new.prefix = "" + new = Call(Name("iter" if isiter else "list"), [new]) + if tail: + new = pytree.Node(syms.power, [new] + tail) + new.prefix = node.prefix + return new + + P1 = "power< func=NAME trailer< '(' node=any ')' > any* >" + p1 = patcomp.compile_pattern(P1) + + P2 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + """ + p2 = patcomp.compile_pattern(P2) + + def in_special_context(self, node, isiter): + if node.parent is None: + return False + results = {} + if (node.parent.parent is not None and + self.p1.match(node.parent.parent, results) and + results["node"] is node): + if isiter: + # iter(d.iterkeys()) -> iter(d.keys()), etc. + return results["func"].value in iter_exempt + else: + # list(d.keys()) -> list(d.keys()), etc. + return results["func"].value in fixer_util.consuming_calls + if not isiter: + return False + # for ... in d.iterkeys() -> for ... in d.keys(), etc. + return self.p2.match(node.parent, results) and results["node"] is node diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_except.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_except.py new file mode 100644 index 00000000..49bd3d5a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_except.py @@ -0,0 +1,93 @@ +"""Fixer for except statements with named exceptions. + +The following cases will be converted: + +- "except E, T:" where T is a name: + + except E as T: + +- "except E, T:" where T is not a name, tuple or list: + + except E as t: + T = t + + This is done because the target of an "except" clause must be a + name. + +- "except E, T:" where T is a tuple or list literal: + + except E as t: + T = t.args +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Assign, Attr, Name, is_tuple, is_list, syms + +def find_excepts(nodes): + for i, n in enumerate(nodes): + if n.type == syms.except_clause: + if n.children[0].value == 'except': + yield (n, nodes[i+2]) + +class FixExcept(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + try_stmt< 'try' ':' (simple_stmt | suite) + cleanup=(except_clause ':' (simple_stmt | suite))+ + tail=(['except' ':' (simple_stmt | suite)] + ['else' ':' (simple_stmt | suite)] + ['finally' ':' (simple_stmt | suite)]) > + """ + + def transform(self, node, results): + syms = self.syms + + tail = [n.clone() for n in results["tail"]] + + try_cleanup = [ch.clone() for ch in results["cleanup"]] + for except_clause, e_suite in find_excepts(try_cleanup): + if len(except_clause.children) == 4: + (E, comma, N) = except_clause.children[1:4] + comma.replace(Name("as", prefix=" ")) + + if N.type != token.NAME: + # Generate a new N for the except clause + new_N = Name(self.new_name(), prefix=" ") + target = N.clone() + target.prefix = "" + N.replace(new_N) + new_N = new_N.clone() + + # Insert "old_N = new_N" as the first statement in + # the except body. This loop skips leading whitespace + # and indents + #TODO(cwinter) suite-cleanup + suite_stmts = e_suite.children + for i, stmt in enumerate(suite_stmts): + if isinstance(stmt, pytree.Node): + break + + # The assignment is different if old_N is a tuple or list + # In that case, the assignment is old_N = new_N.args + if is_tuple(N) or is_list(N): + assign = Assign(target, Attr(new_N, Name('args'))) + else: + assign = Assign(target, new_N) + + #TODO(cwinter) stopgap until children becomes a smart list + for child in reversed(suite_stmts[:i]): + e_suite.insert_child(0, child) + e_suite.insert_child(i, assign) + elif N.prefix == "": + # No space after a comma is legal; no space after "as", + # not so much. + N.prefix = " " + + #TODO(cwinter) fix this when children becomes a smart list + children = [c.clone() for c in node.children[:3]] + try_cleanup + tail + return pytree.Node(node.type, children) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exec.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exec.py new file mode 100644 index 00000000..ab921ee8 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exec.py @@ -0,0 +1,39 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for exec. + +This converts usages of the exec statement into calls to a built-in +exec() function. + +exec code in ns1, ns2 -> exec(code, ns1, ns2) +""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Comma, Name, Call + + +class FixExec(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + exec_stmt< 'exec' a=any 'in' b=any [',' c=any] > + | + exec_stmt< 'exec' (not atom<'(' [any] ')'>) a=any > + """ + + def transform(self, node, results): + assert results + syms = self.syms + a = results["a"] + b = results.get("b") + c = results.get("c") + args = [a.clone()] + args[0].prefix = "" + if b is not None: + args.extend([Comma(), b.clone()]) + if c is not None: + args.extend([Comma(), c.clone()]) + + return Call(Name("exec"), args, prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_execfile.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_execfile.py new file mode 100644 index 00000000..b6c786fd --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_execfile.py @@ -0,0 +1,53 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for execfile. + +This converts usages of the execfile function into calls to the built-in +exec() function. +""" + +from .. import fixer_base +from ..fixer_util import (Comma, Name, Call, LParen, RParen, Dot, Node, + ArgList, String, syms) + + +class FixExecfile(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'execfile' trailer< '(' arglist< filename=any [',' globals=any [',' locals=any ] ] > ')' > > + | + power< 'execfile' trailer< '(' filename=any ')' > > + """ + + def transform(self, node, results): + assert results + filename = results["filename"] + globals = results.get("globals") + locals = results.get("locals") + + # Copy over the prefix from the right parentheses end of the execfile + # call. + execfile_paren = node.children[-1].children[-1].clone() + # Construct open().read(). + open_args = ArgList([filename.clone(), Comma(), String('"rb"', ' ')], + rparen=execfile_paren) + open_call = Node(syms.power, [Name("open"), open_args]) + read = [Node(syms.trailer, [Dot(), Name('read')]), + Node(syms.trailer, [LParen(), RParen()])] + open_expr = [open_call] + read + # Wrap the open call in a compile call. This is so the filename will be + # preserved in the execed code. + filename_arg = filename.clone() + filename_arg.prefix = " " + exec_str = String("'exec'", " ") + compile_args = open_expr + [Comma(), filename_arg, Comma(), exec_str] + compile_call = Call(Name("compile"), compile_args, "") + # Finally, replace the execfile call with an exec call. + args = [compile_call] + if globals is not None: + args.extend([Comma(), globals.clone()]) + if locals is not None: + args.extend([Comma(), locals.clone()]) + return Call(Name("exec"), args, prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exitfunc.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exitfunc.py new file mode 100644 index 00000000..2e47887a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_exitfunc.py @@ -0,0 +1,72 @@ +""" +Convert use of sys.exitfunc to use the atexit module. +""" + +# Author: Benjamin Peterson + +from lib2to3 import pytree, fixer_base +from lib2to3.fixer_util import Name, Attr, Call, Comma, Newline, syms + + +class FixExitfunc(fixer_base.BaseFix): + keep_line_order = True + BM_compatible = True + + PATTERN = """ + ( + sys_import=import_name<'import' + ('sys' + | + dotted_as_names< (any ',')* 'sys' (',' any)* > + ) + > + | + expr_stmt< + power< 'sys' trailer< '.' 'exitfunc' > > + '=' func=any > + ) + """ + + def __init__(self, *args): + super(FixExitfunc, self).__init__(*args) + + def start_tree(self, tree, filename): + super(FixExitfunc, self).start_tree(tree, filename) + self.sys_import = None + + def transform(self, node, results): + # First, find the sys import. We'll just hope it's global scope. + if "sys_import" in results: + if self.sys_import is None: + self.sys_import = results["sys_import"] + return + + func = results["func"].clone() + func.prefix = "" + register = pytree.Node(syms.power, + Attr(Name("atexit"), Name("register")) + ) + call = Call(register, [func], node.prefix) + node.replace(call) + + if self.sys_import is None: + # That's interesting. + self.warning(node, "Can't find sys import; Please add an atexit " + "import at the top of your file.") + return + + # Now add an atexit import after the sys import. + names = self.sys_import.children[1] + if names.type == syms.dotted_as_names: + names.append_child(Comma()) + names.append_child(Name("atexit", " ")) + else: + containing_stmt = self.sys_import.parent + position = containing_stmt.children.index(self.sys_import) + stmt_container = containing_stmt.parent + new_import = pytree.Node(syms.import_name, + [Name("import"), Name("atexit", " ")] + ) + new = pytree.Node(syms.simple_stmt, [new_import]) + containing_stmt.insert_child(position + 1, Newline()) + containing_stmt.insert_child(position + 2, new) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_filter.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_filter.py new file mode 100644 index 00000000..38e9078f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_filter.py @@ -0,0 +1,94 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes filter(F, X) into list(filter(F, X)). + +We avoid the transformation if the filter() call is directly contained +in iter(<>), list(<>), tuple(<>), sorted(<>), ...join(<>), or +for V in <>:. + +NOTE: This is still not correct if the original code was depending on +filter(F, X) to return a string if X is a string and a tuple if X is a +tuple. That would require type inference, which we don't do. Let +Python 2.6 figure it out. +""" + +# Local imports +from .. import fixer_base +from ..pytree import Node +from ..pygram import python_symbols as syms +from ..fixer_util import Name, ArgList, ListComp, in_special_context, parenthesize + + +class FixFilter(fixer_base.ConditionalFix): + BM_compatible = True + + PATTERN = """ + filter_lambda=power< + 'filter' + trailer< + '(' + arglist< + lambdef< 'lambda' + (fp=NAME | vfpdef< '(' fp=NAME ')'> ) ':' xp=any + > + ',' + it=any + > + ')' + > + [extra_trailers=trailer*] + > + | + power< + 'filter' + trailer< '(' arglist< none='None' ',' seq=any > ')' > + [extra_trailers=trailer*] + > + | + power< + 'filter' + args=trailer< '(' [any] ')' > + [extra_trailers=trailer*] + > + """ + + skip_on = "future_builtins.filter" + + def transform(self, node, results): + if self.should_skip(node): + return + + trailers = [] + if 'extra_trailers' in results: + for t in results['extra_trailers']: + trailers.append(t.clone()) + + if "filter_lambda" in results: + xp = results.get("xp").clone() + if xp.type == syms.test: + xp.prefix = "" + xp = parenthesize(xp) + + new = ListComp(results.get("fp").clone(), + results.get("fp").clone(), + results.get("it").clone(), xp) + new = Node(syms.power, [new] + trailers, prefix="") + + elif "none" in results: + new = ListComp(Name("_f"), + Name("_f"), + results["seq"].clone(), + Name("_f")) + new = Node(syms.power, [new] + trailers, prefix="") + + else: + if in_special_context(node): + return None + + args = results['args'].clone() + new = Node(syms.power, [Name("filter"), args], prefix="") + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = "" + new.prefix = node.prefix + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_funcattrs.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_funcattrs.py new file mode 100644 index 00000000..67f3e18e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_funcattrs.py @@ -0,0 +1,21 @@ +"""Fix function attribute names (f.func_x -> f.__x__).""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixFuncattrs(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< any+ trailer< '.' attr=('func_closure' | 'func_doc' | 'func_globals' + | 'func_name' | 'func_defaults' | 'func_code' + | 'func_dict') > any* > + """ + + def transform(self, node, results): + attr = results["attr"][0] + attr.replace(Name(("__%s__" % attr.value[5:]), + prefix=attr.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_future.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_future.py new file mode 100644 index 00000000..fbcb86af --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_future.py @@ -0,0 +1,22 @@ +"""Remove __future__ imports + +from __future__ import foo is replaced with an empty line. +""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import BlankLine + +class FixFuture(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """import_from< 'from' module_name="__future__" 'import' any >""" + + # This should be run last -- some things check for the import + run_order = 10 + + def transform(self, node, results): + new = BlankLine() + new.prefix = node.prefix + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_getcwdu.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_getcwdu.py new file mode 100644 index 00000000..087eaedc --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_getcwdu.py @@ -0,0 +1,19 @@ +""" +Fixer that changes os.getcwdu() to os.getcwd(). +""" +# Author: Victor Stinner + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixGetcwdu(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'os' trailer< dot='.' name='getcwdu' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("getcwd", prefix=name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_has_key.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_has_key.py new file mode 100644 index 00000000..439708c9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_has_key.py @@ -0,0 +1,109 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for has_key(). + +Calls to .has_key() methods are expressed in terms of the 'in' +operator: + + d.has_key(k) -> k in d + +CAVEATS: +1) While the primary target of this fixer is dict.has_key(), the + fixer will change any has_key() method call, regardless of its + class. + +2) Cases like this will not be converted: + + m = d.has_key + if m(k): + ... + + Only *calls* to has_key() are converted. While it is possible to + convert the above to something like + + m = d.__contains__ + if m(k): + ... + + this is currently not done. +""" + +# Local imports +from .. import pytree +from .. import fixer_base +from ..fixer_util import Name, parenthesize + + +class FixHasKey(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + anchor=power< + before=any+ + trailer< '.' 'has_key' > + trailer< + '(' + ( not(arglist | argument) arg=any ','> + ) + ')' + > + after=any* + > + | + negation=not_test< + 'not' + anchor=power< + before=any+ + trailer< '.' 'has_key' > + trailer< + '(' + ( not(arglist | argument) arg=any ','> + ) + ')' + > + > + > + """ + + def transform(self, node, results): + assert results + syms = self.syms + if (node.parent.type == syms.not_test and + self.pattern.match(node.parent)): + # Don't transform a node matching the first alternative of the + # pattern when its parent matches the second alternative + return None + negation = results.get("negation") + anchor = results["anchor"] + prefix = node.prefix + before = [n.clone() for n in results["before"]] + arg = results["arg"].clone() + after = results.get("after") + if after: + after = [n.clone() for n in after] + if arg.type in (syms.comparison, syms.not_test, syms.and_test, + syms.or_test, syms.test, syms.lambdef, syms.argument): + arg = parenthesize(arg) + if len(before) == 1: + before = before[0] + else: + before = pytree.Node(syms.power, before) + before.prefix = " " + n_op = Name("in", prefix=" ") + if negation: + n_not = Name("not", prefix=" ") + n_op = pytree.Node(syms.comp_op, (n_not, n_op)) + new = pytree.Node(syms.comparison, (arg, n_op, before)) + if after: + new = parenthesize(new) + new = pytree.Node(syms.power, (new,) + tuple(after)) + if node.parent.type in (syms.comparison, syms.expr, syms.xor_expr, + syms.and_expr, syms.shift_expr, + syms.arith_expr, syms.term, + syms.factor, syms.power): + new = parenthesize(new) + new.prefix = prefix + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_idioms.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_idioms.py new file mode 100644 index 00000000..6905913d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_idioms.py @@ -0,0 +1,152 @@ +"""Adjust some old Python 2 idioms to their modern counterparts. + +* Change some type comparisons to isinstance() calls: + type(x) == T -> isinstance(x, T) + type(x) is T -> isinstance(x, T) + type(x) != T -> not isinstance(x, T) + type(x) is not T -> not isinstance(x, T) + +* Change "while 1:" into "while True:". + +* Change both + + v = list(EXPR) + v.sort() + foo(v) + +and the more general + + v = EXPR + v.sort() + foo(v) + +into + + v = sorted(EXPR) + foo(v) +""" +# Author: Jacques Frechet, Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Comma, Name, Node, BlankLine, syms + +CMP = "(n='!=' | '==' | 'is' | n=comp_op< 'is' 'not' >)" +TYPE = "power< 'type' trailer< '(' x=any ')' > >" + +class FixIdioms(fixer_base.BaseFix): + explicit = True # The user must ask for this fixer + + PATTERN = r""" + isinstance=comparison< %s %s T=any > + | + isinstance=comparison< T=any %s %s > + | + while_stmt< 'while' while='1' ':' any+ > + | + sorted=any< + any* + simple_stmt< + expr_stmt< id1=any '=' + power< list='list' trailer< '(' (not arglist) any ')' > > + > + '\n' + > + sort= + simple_stmt< + power< id2=any + trailer< '.' 'sort' > trailer< '(' ')' > + > + '\n' + > + next=any* + > + | + sorted=any< + any* + simple_stmt< expr_stmt< id1=any '=' expr=any > '\n' > + sort= + simple_stmt< + power< id2=any + trailer< '.' 'sort' > trailer< '(' ')' > + > + '\n' + > + next=any* + > + """ % (TYPE, CMP, CMP, TYPE) + + def match(self, node): + r = super(FixIdioms, self).match(node) + # If we've matched one of the sort/sorted subpatterns above, we + # want to reject matches where the initial assignment and the + # subsequent .sort() call involve different identifiers. + if r and "sorted" in r: + if r["id1"] == r["id2"]: + return r + return None + return r + + def transform(self, node, results): + if "isinstance" in results: + return self.transform_isinstance(node, results) + elif "while" in results: + return self.transform_while(node, results) + elif "sorted" in results: + return self.transform_sort(node, results) + else: + raise RuntimeError("Invalid match") + + def transform_isinstance(self, node, results): + x = results["x"].clone() # The thing inside of type() + T = results["T"].clone() # The type being compared against + x.prefix = "" + T.prefix = " " + test = Call(Name("isinstance"), [x, Comma(), T]) + if "n" in results: + test.prefix = " " + test = Node(syms.not_test, [Name("not"), test]) + test.prefix = node.prefix + return test + + def transform_while(self, node, results): + one = results["while"] + one.replace(Name("True", prefix=one.prefix)) + + def transform_sort(self, node, results): + sort_stmt = results["sort"] + next_stmt = results["next"] + list_call = results.get("list") + simple_expr = results.get("expr") + + if list_call: + list_call.replace(Name("sorted", prefix=list_call.prefix)) + elif simple_expr: + new = simple_expr.clone() + new.prefix = "" + simple_expr.replace(Call(Name("sorted"), [new], + prefix=simple_expr.prefix)) + else: + raise RuntimeError("should not have reached here") + sort_stmt.remove() + + btwn = sort_stmt.prefix + # Keep any prefix lines between the sort_stmt and the list_call and + # shove them right after the sorted() call. + if "\n" in btwn: + if next_stmt: + # The new prefix should be everything from the sort_stmt's + # prefix up to the last newline, then the old prefix after a new + # line. + prefix_lines = (btwn.rpartition("\n")[0], next_stmt[0].prefix) + next_stmt[0].prefix = "\n".join(prefix_lines) + else: + assert list_call.parent + assert list_call.next_sibling is None + # Put a blank line after list_call and set its prefix. + end_line = BlankLine() + list_call.parent.append_child(end_line) + assert list_call.next_sibling is end_line + # The new prefix should be everything up to the first new line + # of sort_stmt's prefix. + end_line.prefix = btwn.rpartition("\n")[0] diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_import.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_import.py new file mode 100644 index 00000000..734ca294 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_import.py @@ -0,0 +1,99 @@ +"""Fixer for import statements. +If spam is being imported from the local directory, this import: + from spam import eggs +Becomes: + from .spam import eggs + +And this import: + import spam +Becomes: + from . import spam +""" + +# Local imports +from .. import fixer_base +from os.path import dirname, join, exists, sep +from ..fixer_util import FromImport, syms, token + + +def traverse_imports(names): + """ + Walks over all the names imported in a dotted_as_names node. + """ + pending = [names] + while pending: + node = pending.pop() + if node.type == token.NAME: + yield node.value + elif node.type == syms.dotted_name: + yield "".join([ch.value for ch in node.children]) + elif node.type == syms.dotted_as_name: + pending.append(node.children[0]) + elif node.type == syms.dotted_as_names: + pending.extend(node.children[::-2]) + else: + raise AssertionError("unknown node type") + + +class FixImport(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + import_from< 'from' imp=any 'import' ['('] any [')'] > + | + import_name< 'import' imp=any > + """ + + def start_tree(self, tree, name): + super(FixImport, self).start_tree(tree, name) + self.skip = "absolute_import" in tree.future_features + + def transform(self, node, results): + if self.skip: + return + imp = results['imp'] + + if node.type == syms.import_from: + # Some imps are top-level (eg: 'import ham') + # some are first level (eg: 'import ham.eggs') + # some are third level (eg: 'import ham.eggs as spam') + # Hence, the loop + while not hasattr(imp, 'value'): + imp = imp.children[0] + if self.probably_a_local_import(imp.value): + imp.value = "." + imp.value + imp.changed() + else: + have_local = False + have_absolute = False + for mod_name in traverse_imports(imp): + if self.probably_a_local_import(mod_name): + have_local = True + else: + have_absolute = True + if have_absolute: + if have_local: + # We won't handle both sibling and absolute imports in the + # same statement at the moment. + self.warning(node, "absolute and local imports together") + return + + new = FromImport(".", [imp]) + new.prefix = node.prefix + return new + + def probably_a_local_import(self, imp_name): + if imp_name.startswith("."): + # Relative imports are certainly not local imports. + return False + imp_name = imp_name.split(".", 1)[0] + base_path = dirname(self.filename) + base_path = join(base_path, imp_name) + # If there is no __init__.py next to the file its not in a package + # so can't be a relative import. + if not exists(join(dirname(base_path), "__init__.py")): + return False + for ext in [".py", sep, ".pyc", ".so", ".sl", ".pyd"]: + if exists(base_path + ext): + return True + return False diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports.py new file mode 100644 index 00000000..aaf4f2f6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports.py @@ -0,0 +1,145 @@ +"""Fix incompatible imports and module references.""" +# Authors: Collin Winter, Nick Edds + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, attr_chain + +MAPPING = {'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + # anydbm and whichdb are handled by fix_imports2 + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + #'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', +} + + +def alternates(members): + return "(" + "|".join(map(repr, members)) + ")" + + +def build_pattern(mapping=MAPPING): + mod_list = ' | '.join(["module_name='%s'" % key for key in mapping]) + bare_names = alternates(mapping.keys()) + + yield """name_import=import_name< 'import' ((%s) | + multiple_imports=dotted_as_names< any* (%s) any* >) > + """ % (mod_list, mod_list) + yield """import_from< 'from' (%s) 'import' ['('] + ( any | import_as_name< any 'as' any > | + import_as_names< any* >) [')'] > + """ % mod_list + yield """import_name< 'import' (dotted_as_name< (%s) 'as' any > | + multiple_imports=dotted_as_names< + any* dotted_as_name< (%s) 'as' any > any* >) > + """ % (mod_list, mod_list) + + # Find usages of module members in code e.g. thread.foo(bar) + yield "power< bare_with_attr=(%s) trailer<'.' any > any* >" % bare_names + + +class FixImports(fixer_base.BaseFix): + + BM_compatible = True + keep_line_order = True + # This is overridden in fix_imports2. + mapping = MAPPING + + # We want to run this fixer late, so fix_import doesn't try to make stdlib + # renames into relative imports. + run_order = 6 + + def build_pattern(self): + return "|".join(build_pattern(self.mapping)) + + def compile_pattern(self): + # We override this, so MAPPING can be pragmatically altered and the + # changes will be reflected in PATTERN. + self.PATTERN = self.build_pattern() + super(FixImports, self).compile_pattern() + + # Don't match the node if it's within another match. + def match(self, node): + match = super(FixImports, self).match + results = match(node) + if results: + # Module usage could be in the trailer of an attribute lookup, so we + # might have nested matches when "bare_with_attr" is present. + if "bare_with_attr" not in results and \ + any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + def start_tree(self, tree, filename): + super(FixImports, self).start_tree(tree, filename) + self.replace = {} + + def transform(self, node, results): + import_mod = results.get("module_name") + if import_mod: + mod_name = import_mod.value + new_name = self.mapping[mod_name] + import_mod.replace(Name(new_name, prefix=import_mod.prefix)) + if "name_import" in results: + # If it's not a "from x import x, y" or "import x as y" import, + # marked its usage to be replaced. + self.replace[mod_name] = new_name + if "multiple_imports" in results: + # This is a nasty hack to fix multiple imports on a line (e.g., + # "import StringIO, urlparse"). The problem is that I can't + # figure out an easy way to make a pattern recognize the keys of + # MAPPING randomly sprinkled in an import statement. + results = self.match(node) + if results: + self.transform(node, results) + else: + # Replace usage of the module. + bare_name = results["bare_with_attr"][0] + new_name = self.replace.get(bare_name.value) + if new_name: + bare_name.replace(Name(new_name, prefix=bare_name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports2.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports2.py new file mode 100644 index 00000000..9a33c67b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_imports2.py @@ -0,0 +1,16 @@ +"""Fix incompatible imports and module references that must be fixed after +fix_imports.""" +from . import fix_imports + + +MAPPING = { + 'whichdb': 'dbm', + 'anydbm': 'dbm', + } + + +class FixImports2(fix_imports.FixImports): + + run_order = 7 + + mapping = MAPPING diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_input.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_input.py new file mode 100644 index 00000000..9cf9a48c --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_input.py @@ -0,0 +1,26 @@ +"""Fixer that changes input(...) into eval(input(...)).""" +# Author: Andre Roberge + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Name +from .. import patcomp + + +context = patcomp.compile_pattern("power< 'eval' trailer< '(' any ')' > >") + + +class FixInput(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< 'input' args=trailer< '(' [any] ')' > > + """ + + def transform(self, node, results): + # If we're already wrapped in an eval() call, we're done. + if context.match(node.parent.parent): + return + + new = node.clone() + new.prefix = "" + return Call(Name("eval"), [new], prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_intern.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_intern.py new file mode 100644 index 00000000..d7528430 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_intern.py @@ -0,0 +1,39 @@ +# Copyright 2006 Georg Brandl. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for intern(). + +intern(s) -> sys.intern(s)""" + +# Local imports +from .. import fixer_base +from ..fixer_util import ImportAndCall, touch_import + + +class FixIntern(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'intern' + trailer< lpar='(' + ( not(arglist | argument) any ','> ) + rpar=')' > + after=any* + > + """ + + def transform(self, node, results): + if results: + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + obj = results['obj'] + if obj: + if (obj.type == self.syms.argument and + obj.children[0].value in {'**', '*'}): + return # Make no change. + names = ('sys', 'intern') + new = ImportAndCall(node, results, names) + touch_import(None, 'sys', node) + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_isinstance.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_isinstance.py new file mode 100644 index 00000000..bebb1de1 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_isinstance.py @@ -0,0 +1,52 @@ +# Copyright 2008 Armin Ronacher. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that cleans up a tuple argument to isinstance after the tokens +in it were fixed. This is mainly used to remove double occurrences of +tokens as a leftover of the long -> int / unicode -> str conversion. + +eg. isinstance(x, (int, long)) -> isinstance(x, (int, int)) + -> isinstance(x, int) +""" + +from .. import fixer_base +from ..fixer_util import token + + +class FixIsinstance(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< + 'isinstance' + trailer< '(' arglist< any ',' atom< '(' + args=testlist_gexp< any+ > + ')' > > ')' > + > + """ + + run_order = 6 + + def transform(self, node, results): + names_inserted = set() + testlist = results["args"] + args = testlist.children + new_args = [] + iterator = enumerate(args) + for idx, arg in iterator: + if arg.type == token.NAME and arg.value in names_inserted: + if idx < len(args) - 1 and args[idx + 1].type == token.COMMA: + next(iterator) + continue + else: + new_args.append(arg) + if arg.type == token.NAME: + names_inserted.add(arg.value) + if new_args and new_args[-1].type == token.COMMA: + del new_args[-1] + if len(new_args) == 1: + atom = testlist.parent + new_args[0].prefix = atom.prefix + atom.replace(new_args[0]) + else: + args[:] = new_args + node.changed() diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools.py new file mode 100644 index 00000000..8e78d6c6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools.py @@ -0,0 +1,43 @@ +""" Fixer for itertools.(imap|ifilter|izip) --> (map|filter|zip) and + itertools.ifilterfalse --> itertools.filterfalse (bugs 2360-2363) + + imports from itertools are fixed in fix_itertools_import.py + + If itertools is imported as something else (ie: import itertools as it; + it.izip(spam, eggs)) method calls will not get fixed. + """ + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixItertools(fixer_base.BaseFix): + BM_compatible = True + it_funcs = "('imap'|'ifilter'|'izip'|'izip_longest'|'ifilterfalse')" + PATTERN = """ + power< it='itertools' + trailer< + dot='.' func=%(it_funcs)s > trailer< '(' [any] ')' > > + | + power< func=%(it_funcs)s trailer< '(' [any] ')' > > + """ %(locals()) + + # Needs to be run after fix_(map|zip|filter) + run_order = 6 + + def transform(self, node, results): + prefix = None + func = results['func'][0] + if ('it' in results and + func.value not in ('ifilterfalse', 'izip_longest')): + dot, it = (results['dot'], results['it']) + # Remove the 'itertools' + prefix = it.prefix + it.remove() + # Replace the node which contains ('.', 'function') with the + # function (to be consistent with the second part of the pattern) + dot.remove() + func.parent.replace(func) + + prefix = prefix or func.prefix + func.replace(Name(func.value[1:], prefix=prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools_imports.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools_imports.py new file mode 100644 index 00000000..0ddbc7b8 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_itertools_imports.py @@ -0,0 +1,57 @@ +""" Fixer for imports of itertools.(imap|ifilter|izip|ifilterfalse) """ + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import BlankLine, syms, token + + +class FixItertoolsImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + import_from< 'from' 'itertools' 'import' imports=any > + """ %(locals()) + + def transform(self, node, results): + imports = results['imports'] + if imports.type == syms.import_as_name or not imports.children: + children = [imports] + else: + children = imports.children + for child in children[::2]: + if child.type == token.NAME: + member = child.value + name_node = child + elif child.type == token.STAR: + # Just leave the import as is. + return + else: + assert child.type == syms.import_as_name + name_node = child.children[0] + member_name = name_node.value + if member_name in ('imap', 'izip', 'ifilter'): + child.value = None + child.remove() + elif member_name in ('ifilterfalse', 'izip_longest'): + node.changed() + name_node.value = ('filterfalse' if member_name[1] == 'f' + else 'zip_longest') + + # Make sure the import statement is still sane + children = imports.children[:] or [imports] + remove_comma = True + for child in children: + if remove_comma and child.type == token.COMMA: + child.remove() + else: + remove_comma ^= True + + while children and children[-1].type == token.COMMA: + children.pop().remove() + + # If there are no imports left, just get rid of the entire statement + if (not (imports.children or getattr(imports, 'value', None)) or + imports.parent is None): + p = node.prefix + node = BlankLine() + node.prefix = p + return node diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_long.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_long.py new file mode 100644 index 00000000..f227c9f4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_long.py @@ -0,0 +1,19 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that turns 'long' into 'int' everywhere. +""" + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import is_probably_builtin + + +class FixLong(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "'long'" + + def transform(self, node, results): + if is_probably_builtin(node): + node.value = "int" + node.changed() diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_map.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_map.py new file mode 100644 index 00000000..78cf81c6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_map.py @@ -0,0 +1,110 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes map(F, ...) into list(map(F, ...)) unless there +exists a 'from future_builtins import map' statement in the top-level +namespace. + +As a special case, map(None, X) is changed into list(X). (This is +necessary because the semantics are changed in this case -- the new +map(None, X) is equivalent to [(x,) for x in X].) + +We avoid the transformation (except for the special case mentioned +above) if the map() call is directly contained in iter(<>), list(<>), +tuple(<>), sorted(<>), ...join(<>), or for V in <>:. + +NOTE: This is still not correct if the original code was depending on +map(F, X, Y, ...) to go on until the longest argument is exhausted, +substituting None for missing values -- like zip(), it now stops as +soon as the shortest argument is exhausted. +""" + +# Local imports +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, ArgList, Call, ListComp, in_special_context +from ..pygram import python_symbols as syms +from ..pytree import Node + + +class FixMap(fixer_base.ConditionalFix): + BM_compatible = True + + PATTERN = """ + map_none=power< + 'map' + trailer< '(' arglist< 'None' ',' arg=any [','] > ')' > + [extra_trailers=trailer*] + > + | + map_lambda=power< + 'map' + trailer< + '(' + arglist< + lambdef< 'lambda' + (fp=NAME | vfpdef< '(' fp=NAME ')'> ) ':' xp=any + > + ',' + it=any + > + ')' + > + [extra_trailers=trailer*] + > + | + power< + 'map' args=trailer< '(' [any] ')' > + [extra_trailers=trailer*] + > + """ + + skip_on = 'future_builtins.map' + + def transform(self, node, results): + if self.should_skip(node): + return + + trailers = [] + if 'extra_trailers' in results: + for t in results['extra_trailers']: + trailers.append(t.clone()) + + if node.parent.type == syms.simple_stmt: + self.warning(node, "You should use a for loop here") + new = node.clone() + new.prefix = "" + new = Call(Name("list"), [new]) + elif "map_lambda" in results: + new = ListComp(results["xp"].clone(), + results["fp"].clone(), + results["it"].clone()) + new = Node(syms.power, [new] + trailers, prefix="") + + else: + if "map_none" in results: + new = results["arg"].clone() + new.prefix = "" + else: + if "args" in results: + args = results["args"] + if args.type == syms.trailer and \ + args.children[1].type == syms.arglist and \ + args.children[1].children[0].type == token.NAME and \ + args.children[1].children[0].value == "None": + self.warning(node, "cannot convert map(None, ...) " + "with multiple arguments because map() " + "now truncates to the shortest sequence") + return + + new = Node(syms.power, [Name("map"), args.clone()]) + new.prefix = "" + + if in_special_context(node): + return None + + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = "" + + new.prefix = node.prefix + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_metaclass.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_metaclass.py new file mode 100644 index 00000000..fe547b22 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_metaclass.py @@ -0,0 +1,228 @@ +"""Fixer for __metaclass__ = X -> (metaclass=X) methods. + + The various forms of classef (inherits nothing, inherits once, inherits + many) don't parse the same in the CST so we look at ALL classes for + a __metaclass__ and if we find one normalize the inherits to all be + an arglist. + + For one-liner classes ('class X: pass') there is no indent/dedent so + we normalize those into having a suite. + + Moving the __metaclass__ into the classdef can also cause the class + body to be empty so there is some special casing for that as well. + + This fixer also tries very hard to keep original indenting and spacing + in all those corner cases. + +""" +# Author: Jack Diederich + +# Local imports +from .. import fixer_base +from ..pygram import token +from ..fixer_util import syms, Node, Leaf + + +def has_metaclass(parent): + """ we have to check the cls_node without changing it. + There are two possibilities: + 1) clsdef => suite => simple_stmt => expr_stmt => Leaf('__meta') + 2) clsdef => simple_stmt => expr_stmt => Leaf('__meta') + """ + for node in parent.children: + if node.type == syms.suite: + return has_metaclass(node) + elif node.type == syms.simple_stmt and node.children: + expr_node = node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + left_side = expr_node.children[0] + if isinstance(left_side, Leaf) and \ + left_side.value == '__metaclass__': + return True + return False + + +def fixup_parse_tree(cls_node): + """ one-line classes don't get a suite in the parse tree so we add + one to normalize the tree + """ + for node in cls_node.children: + if node.type == syms.suite: + # already in the preferred format, do nothing + return + + # !%@#! one-liners have no suite node, we have to fake one up + for i, node in enumerate(cls_node.children): + if node.type == token.COLON: + break + else: + raise ValueError("No class suite and no ':'!") + + # move everything into a suite node + suite = Node(syms.suite, []) + while cls_node.children[i+1:]: + move_node = cls_node.children[i+1] + suite.append_child(move_node.clone()) + move_node.remove() + cls_node.append_child(suite) + node = suite + + +def fixup_simple_stmt(parent, i, stmt_node): + """ if there is a semi-colon all the parts count as part of the same + simple_stmt. We just want the __metaclass__ part so we move + everything after the semi-colon into its own simple_stmt node + """ + for semi_ind, node in enumerate(stmt_node.children): + if node.type == token.SEMI: # *sigh* + break + else: + return + + node.remove() # kill the semicolon + new_expr = Node(syms.expr_stmt, []) + new_stmt = Node(syms.simple_stmt, [new_expr]) + while stmt_node.children[semi_ind:]: + move_node = stmt_node.children[semi_ind] + new_expr.append_child(move_node.clone()) + move_node.remove() + parent.insert_child(i, new_stmt) + new_leaf1 = new_stmt.children[0].children[0] + old_leaf1 = stmt_node.children[0].children[0] + new_leaf1.prefix = old_leaf1.prefix + + +def remove_trailing_newline(node): + if node.children and node.children[-1].type == token.NEWLINE: + node.children[-1].remove() + + +def find_metas(cls_node): + # find the suite node (Mmm, sweet nodes) + for node in cls_node.children: + if node.type == syms.suite: + break + else: + raise ValueError("No class suite!") + + # look for simple_stmt[ expr_stmt[ Leaf('__metaclass__') ] ] + for i, simple_node in list(enumerate(node.children)): + if simple_node.type == syms.simple_stmt and simple_node.children: + expr_node = simple_node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + # Check if the expr_node is a simple assignment. + left_node = expr_node.children[0] + if isinstance(left_node, Leaf) and \ + left_node.value == '__metaclass__': + # We found an assignment to __metaclass__. + fixup_simple_stmt(node, i, simple_node) + remove_trailing_newline(simple_node) + yield (node, i, simple_node) + + +def fixup_indent(suite): + """ If an INDENT is followed by a thing with a prefix then nuke the prefix + Otherwise we get in trouble when removing __metaclass__ at suite start + """ + kids = suite.children[::-1] + # find the first indent + while kids: + node = kids.pop() + if node.type == token.INDENT: + break + + # find the first Leaf + while kids: + node = kids.pop() + if isinstance(node, Leaf) and node.type != token.DEDENT: + if node.prefix: + node.prefix = '' + return + else: + kids.extend(node.children[::-1]) + + +class FixMetaclass(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + classdef + """ + + def transform(self, node, results): + if not has_metaclass(node): + return + + fixup_parse_tree(node) + + # find metaclasses, keep the last one + last_metaclass = None + for suite, i, stmt in find_metas(node): + last_metaclass = stmt + stmt.remove() + + text_type = node.children[0].type # always Leaf(nnn, 'class') + + # figure out what kind of classdef we have + if len(node.children) == 7: + # Node(classdef, ['class', 'name', '(', arglist, ')', ':', suite]) + # 0 1 2 3 4 5 6 + if node.children[3].type == syms.arglist: + arglist = node.children[3] + # Node(classdef, ['class', 'name', '(', 'Parent', ')', ':', suite]) + else: + parent = node.children[3].clone() + arglist = Node(syms.arglist, [parent]) + node.set_child(3, arglist) + elif len(node.children) == 6: + # Node(classdef, ['class', 'name', '(', ')', ':', suite]) + # 0 1 2 3 4 5 + arglist = Node(syms.arglist, []) + node.insert_child(3, arglist) + elif len(node.children) == 4: + # Node(classdef, ['class', 'name', ':', suite]) + # 0 1 2 3 + arglist = Node(syms.arglist, []) + node.insert_child(2, Leaf(token.RPAR, ')')) + node.insert_child(2, arglist) + node.insert_child(2, Leaf(token.LPAR, '(')) + else: + raise ValueError("Unexpected class definition") + + # now stick the metaclass in the arglist + meta_txt = last_metaclass.children[0].children[0] + meta_txt.value = 'metaclass' + orig_meta_prefix = meta_txt.prefix + + if arglist.children: + arglist.append_child(Leaf(token.COMMA, ',')) + meta_txt.prefix = ' ' + else: + meta_txt.prefix = '' + + # compact the expression "metaclass = Meta" -> "metaclass=Meta" + expr_stmt = last_metaclass.children[0] + assert expr_stmt.type == syms.expr_stmt + expr_stmt.children[1].prefix = '' + expr_stmt.children[2].prefix = '' + + arglist.append_child(last_metaclass) + + fixup_indent(suite) + + # check for empty suite + if not suite.children: + # one-liner that was just __metaclass_ + suite.remove() + pass_leaf = Leaf(text_type, 'pass') + pass_leaf.prefix = orig_meta_prefix + node.append_child(pass_leaf) + node.append_child(Leaf(token.NEWLINE, '\n')) + + elif len(suite.children) > 1 and \ + (suite.children[-2].type == token.INDENT and + suite.children[-1].type == token.DEDENT): + # there was only one line in the class body and it was __metaclass__ + pass_leaf = Leaf(text_type, 'pass') + suite.insert_child(-1, pass_leaf) + suite.insert_child(-1, Leaf(token.NEWLINE, '\n')) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_methodattrs.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_methodattrs.py new file mode 100644 index 00000000..7f9004f0 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_methodattrs.py @@ -0,0 +1,24 @@ +"""Fix bound method attributes (method.im_? -> method.__?__). +""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +MAP = { + "im_func" : "__func__", + "im_self" : "__self__", + "im_class" : "__self__.__class__" + } + +class FixMethodattrs(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< any+ trailer< '.' attr=('im_func' | 'im_self' | 'im_class') > any* > + """ + + def transform(self, node, results): + attr = results["attr"][0] + new = MAP[attr.value] + attr.replace(Name(new, prefix=attr.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ne.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ne.py new file mode 100644 index 00000000..e3ee10f4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ne.py @@ -0,0 +1,23 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that turns <> into !=.""" + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base + + +class FixNe(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.NOTEQUAL + + def match(self, node): + # Override + return node.value == "<>" + + def transform(self, node, results): + new = pytree.Leaf(token.NOTEQUAL, "!=", prefix=node.prefix) + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_next.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_next.py new file mode 100644 index 00000000..9f6305e1 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_next.py @@ -0,0 +1,103 @@ +"""Fixer for it.next() -> next(it), per PEP 3114.""" +# Author: Collin Winter + +# Things that currently aren't covered: +# - listcomp "next" names aren't warned +# - "with" statement targets aren't checked + +# Local imports +from ..pgen2 import token +from ..pygram import python_symbols as syms +from .. import fixer_base +from ..fixer_util import Name, Call, find_binding + +bind_warning = "Calls to builtin next() possibly shadowed by global binding" + + +class FixNext(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > > + | + power< head=any+ trailer< '.' attr='next' > not trailer< '(' ')' > > + | + classdef< 'class' any+ ':' + suite< any* + funcdef< 'def' + name='next' + parameters< '(' NAME ')' > any+ > + any* > > + | + global=global_stmt< 'global' any* 'next' any* > + """ + + order = "pre" # Pre-order tree traversal + + def start_tree(self, tree, filename): + super(FixNext, self).start_tree(tree, filename) + + n = find_binding('next', tree) + if n: + self.warning(n, bind_warning) + self.shadowed_next = True + else: + self.shadowed_next = False + + def transform(self, node, results): + assert results + + base = results.get("base") + attr = results.get("attr") + name = results.get("name") + + if base: + if self.shadowed_next: + attr.replace(Name("__next__", prefix=attr.prefix)) + else: + base = [n.clone() for n in base] + base[0].prefix = "" + node.replace(Call(Name("next", prefix=node.prefix), base)) + elif name: + n = Name("__next__", prefix=name.prefix) + name.replace(n) + elif attr: + # We don't do this transformation if we're assigning to "x.next". + # Unfortunately, it doesn't seem possible to do this in PATTERN, + # so it's being done here. + if is_assign_target(node): + head = results["head"] + if "".join([str(n) for n in head]).strip() == '__builtin__': + self.warning(node, bind_warning) + return + attr.replace(Name("__next__")) + elif "global" in results: + self.warning(node, bind_warning) + self.shadowed_next = True + + +### The following functions help test if node is part of an assignment +### target. + +def is_assign_target(node): + assign = find_assign(node) + if assign is None: + return False + + for child in assign.children: + if child.type == token.EQUAL: + return False + elif is_subtree(child, node): + return True + return False + +def find_assign(node): + if node.type == syms.expr_stmt: + return node + if node.type == syms.simple_stmt or node.parent is None: + return None + return find_assign(node.parent) + +def is_subtree(root, node): + if root == node: + return True + return any(is_subtree(c, node) for c in root.children) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_nonzero.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_nonzero.py new file mode 100644 index 00000000..c2295969 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_nonzero.py @@ -0,0 +1,21 @@ +"""Fixer for __nonzero__ -> __bool__ methods.""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixNonzero(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + classdef< 'class' any+ ':' + suite< any* + funcdef< 'def' name='__nonzero__' + parameters< '(' NAME ')' > any+ > + any* > > + """ + + def transform(self, node, results): + name = results["name"] + new = Name("__bool__", prefix=name.prefix) + name.replace(new) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_numliterals.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_numliterals.py new file mode 100644 index 00000000..79207d4a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_numliterals.py @@ -0,0 +1,28 @@ +"""Fixer that turns 1L into 1, 0755 into 0o755. +""" +# Copyright 2007 Georg Brandl. +# Licensed to PSF under a Contributor Agreement. + +# Local imports +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Number + + +class FixNumliterals(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.NUMBER + + def match(self, node): + # Override + return (node.value.startswith("0") or node.value[-1] in "Ll") + + def transform(self, node, results): + val = node.value + if val[-1] in 'Ll': + val = val[:-1] + elif val.startswith('0') and val.isdigit() and len(set(val)) > 1: + val = "0o" + val[1:] + + return Number(val, prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_operator.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_operator.py new file mode 100644 index 00000000..d303cd20 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_operator.py @@ -0,0 +1,97 @@ +"""Fixer for operator functions. + +operator.isCallable(obj) -> callable(obj) +operator.sequenceIncludes(obj) -> operator.contains(obj) +operator.isSequenceType(obj) -> isinstance(obj, collections.abc.Sequence) +operator.isMappingType(obj) -> isinstance(obj, collections.abc.Mapping) +operator.isNumberType(obj) -> isinstance(obj, numbers.Number) +operator.repeat(obj, n) -> operator.mul(obj, n) +operator.irepeat(obj, n) -> operator.imul(obj, n) +""" + +import collections.abc + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import Call, Name, String, touch_import + + +def invocation(s): + def dec(f): + f.invocation = s + return f + return dec + + +class FixOperator(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + methods = """ + method=('isCallable'|'sequenceIncludes' + |'isSequenceType'|'isMappingType'|'isNumberType' + |'repeat'|'irepeat') + """ + obj = "'(' obj=any ')'" + PATTERN = """ + power< module='operator' + trailer< '.' %(methods)s > trailer< %(obj)s > > + | + power< %(methods)s trailer< %(obj)s > > + """ % dict(methods=methods, obj=obj) + + def transform(self, node, results): + method = self._check_method(node, results) + if method is not None: + return method(node, results) + + @invocation("operator.contains(%s)") + def _sequenceIncludes(self, node, results): + return self._handle_rename(node, results, "contains") + + @invocation("callable(%s)") + def _isCallable(self, node, results): + obj = results["obj"] + return Call(Name("callable"), [obj.clone()], prefix=node.prefix) + + @invocation("operator.mul(%s)") + def _repeat(self, node, results): + return self._handle_rename(node, results, "mul") + + @invocation("operator.imul(%s)") + def _irepeat(self, node, results): + return self._handle_rename(node, results, "imul") + + @invocation("isinstance(%s, collections.abc.Sequence)") + def _isSequenceType(self, node, results): + return self._handle_type2abc(node, results, "collections.abc", "Sequence") + + @invocation("isinstance(%s, collections.abc.Mapping)") + def _isMappingType(self, node, results): + return self._handle_type2abc(node, results, "collections.abc", "Mapping") + + @invocation("isinstance(%s, numbers.Number)") + def _isNumberType(self, node, results): + return self._handle_type2abc(node, results, "numbers", "Number") + + def _handle_rename(self, node, results, name): + method = results["method"][0] + method.value = name + method.changed() + + def _handle_type2abc(self, node, results, module, abc): + touch_import(None, module, node) + obj = results["obj"] + args = [obj.clone(), String(", " + ".".join([module, abc]))] + return Call(Name("isinstance"), args, prefix=node.prefix) + + def _check_method(self, node, results): + method = getattr(self, "_" + results["method"][0].value) + if isinstance(method, collections.abc.Callable): + if "module" in results: + return method + else: + sub = (str(results["obj"]),) + invocation_str = method.invocation % sub + self.warning(node, "You should use '%s' here." % invocation_str) + return None diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_paren.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_paren.py new file mode 100644 index 00000000..df3da5f5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_paren.py @@ -0,0 +1,44 @@ +"""Fixer that adds parentheses where they are required + +This converts ``[x for x in 1, 2]`` to ``[x for x in (1, 2)]``.""" + +# By Taek Joo Kim and Benjamin Peterson + +# Local imports +from .. import fixer_base +from ..fixer_util import LParen, RParen + +# XXX This doesn't support nested for loops like [x for x in 1, 2 for x in 1, 2] +class FixParen(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + atom< ('[' | '(') + (listmaker< any + comp_for< + 'for' NAME 'in' + target=testlist_safe< any (',' any)+ [','] + > + [any] + > + > + | + testlist_gexp< any + comp_for< + 'for' NAME 'in' + target=testlist_safe< any (',' any)+ [','] + > + [any] + > + >) + (']' | ')') > + """ + + def transform(self, node, results): + target = results["target"] + + lparen = LParen() + lparen.prefix = target.prefix + target.prefix = "" # Make it hug the parentheses + target.insert_child(0, lparen) + target.append_child(RParen()) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_print.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_print.py new file mode 100644 index 00000000..87803222 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_print.py @@ -0,0 +1,87 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for print. + +Change: + 'print' into 'print()' + 'print ...' into 'print(...)' + 'print ... ,' into 'print(..., end=" ")' + 'print >>x, ...' into 'print(..., file=x)' + +No changes are applied if print_function is imported from __future__ + +""" + +# Local imports +from .. import patcomp +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, Comma, String + + +parend_expr = patcomp.compile_pattern( + """atom< '(' [atom|STRING|NAME] ')' >""" + ) + + +class FixPrint(fixer_base.BaseFix): + + BM_compatible = True + + PATTERN = """ + simple_stmt< any* bare='print' any* > | print_stmt + """ + + def transform(self, node, results): + assert results + + bare_print = results.get("bare") + + if bare_print: + # Special-case print all by itself + bare_print.replace(Call(Name("print"), [], + prefix=bare_print.prefix)) + return + assert node.children[0] == Name("print") + args = node.children[1:] + if len(args) == 1 and parend_expr.match(args[0]): + # We don't want to keep sticking parens around an + # already-parenthesised expression. + return + + sep = end = file = None + if args and args[-1] == Comma(): + args = args[:-1] + end = " " + if args and args[0] == pytree.Leaf(token.RIGHTSHIFT, ">>"): + assert len(args) >= 2 + file = args[1].clone() + args = args[3:] # Strip a possible comma after the file expression + # Now synthesize a print(args, sep=..., end=..., file=...) node. + l_args = [arg.clone() for arg in args] + if l_args: + l_args[0].prefix = "" + if sep is not None or end is not None or file is not None: + if sep is not None: + self.add_kwarg(l_args, "sep", String(repr(sep))) + if end is not None: + self.add_kwarg(l_args, "end", String(repr(end))) + if file is not None: + self.add_kwarg(l_args, "file", file) + n_stmt = Call(Name("print"), l_args) + n_stmt.prefix = node.prefix + return n_stmt + + def add_kwarg(self, l_nodes, s_kwd, n_expr): + # XXX All this prefix-setting may lose comments (though rarely) + n_expr.prefix = "" + n_argument = pytree.Node(self.syms.argument, + (Name(s_kwd), + pytree.Leaf(token.EQUAL, "="), + n_expr)) + if l_nodes: + l_nodes.append(Comma()) + n_argument.prefix = " " + l_nodes.append(n_argument) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raise.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raise.py new file mode 100644 index 00000000..05aa21e7 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raise.py @@ -0,0 +1,90 @@ +"""Fixer for 'raise E, V, T' + +raise -> raise +raise E -> raise E +raise E, V -> raise E(V) +raise E, V, T -> raise E(V).with_traceback(T) +raise E, None, T -> raise E.with_traceback(T) + +raise (((E, E'), E''), E'''), V -> raise E(V) +raise "foo", V, T -> warns about string exceptions + + +CAVEATS: +1) "raise E, V" will be incorrectly translated if V is an exception + instance. The correct Python 3 idiom is + + raise E from V + + but since we can't detect instance-hood by syntax alone and since + any client code would have to be changed as well, we don't automate + this. +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, Attr, ArgList, is_tuple + +class FixRaise(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type == token.STRING: + msg = "Python 3 does not support string exceptions" + self.cannot_convert(node, msg) + return + + # Python 2 supports + # raise ((((E1, E2), E3), E4), E5), V + # as a synonym for + # raise E1, V + # Since Python 3 will not support this, we recurse down any tuple + # literals, always taking the first element. + if is_tuple(exc): + while is_tuple(exc): + # exc.children[1:-1] is the unparenthesized tuple + # exc.children[1].children[0] is the first element of the tuple + exc = exc.children[1].children[0].clone() + exc.prefix = " " + + if "val" not in results: + # One-argument raise + new = pytree.Node(syms.raise_stmt, [Name("raise"), exc]) + new.prefix = node.prefix + return new + + val = results["val"].clone() + if is_tuple(val): + args = [c.clone() for c in val.children[1:-1]] + else: + val.prefix = "" + args = [val] + + if "tb" in results: + tb = results["tb"].clone() + tb.prefix = "" + + e = exc + # If there's a traceback and None is passed as the value, then don't + # add a call, since the user probably just wants to add a + # traceback. See issue #9661. + if val.type != token.NAME or val.value != "None": + e = Call(exc, args) + with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] + new = pytree.Node(syms.simple_stmt, [Name("raise")] + with_tb) + new.prefix = node.prefix + return new + else: + return pytree.Node(syms.raise_stmt, + [Name("raise"), Call(exc, args)], + prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raw_input.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raw_input.py new file mode 100644 index 00000000..a51bb694 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_raw_input.py @@ -0,0 +1,17 @@ +"""Fixer that changes raw_input(...) into input(...).""" +# Author: Andre Roberge + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixRawInput(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + power< name='raw_input' trailer< '(' [any] ')' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("input", prefix=name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reduce.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reduce.py new file mode 100644 index 00000000..00e5aa1c --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reduce.py @@ -0,0 +1,35 @@ +# Copyright 2008 Armin Ronacher. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for reduce(). + +Makes sure reduce() is imported from the functools module if reduce is +used in that module. +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import touch_import + + + +class FixReduce(fixer_base.BaseFix): + + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'reduce' + trailer< '(' + arglist< ( + (not(argument) any ',' + not(argument + > + """ + + def transform(self, node, results): + touch_import('functools', 'reduce', node) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reload.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reload.py new file mode 100644 index 00000000..b3084113 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_reload.py @@ -0,0 +1,36 @@ +"""Fixer for reload(). + +reload(s) -> importlib.reload(s)""" + +# Local imports +from .. import fixer_base +from ..fixer_util import ImportAndCall, touch_import + + +class FixReload(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'reload' + trailer< lpar='(' + ( not(arglist | argument) any ','> ) + rpar=')' > + after=any* + > + """ + + def transform(self, node, results): + if results: + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + obj = results['obj'] + if obj: + if (obj.type == self.syms.argument and + obj.children[0].value in {'**', '*'}): + return # Make no change. + names = ('importlib', 'reload') + new = ImportAndCall(node, results, names) + touch_import(None, 'importlib', node) + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_renames.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_renames.py new file mode 100644 index 00000000..c0e3705a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_renames.py @@ -0,0 +1,70 @@ +"""Fix incompatible renames + +Fixes: + * sys.maxint -> sys.maxsize +""" +# Author: Christian Heimes +# based on Collin Winter's fix_import + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, attr_chain + +MAPPING = {"sys": {"maxint" : "maxsize"}, + } +LOOKUP = {} + +def alternates(members): + return "(" + "|".join(map(repr, members)) + ")" + + +def build_pattern(): + #bare = set() + for module, replace in list(MAPPING.items()): + for old_attr, new_attr in list(replace.items()): + LOOKUP[(module, old_attr)] = new_attr + #bare.add(module) + #bare.add(old_attr) + #yield """ + # import_name< 'import' (module=%r + # | dotted_as_names< any* module=%r any* >) > + # """ % (module, module) + yield """ + import_from< 'from' module_name=%r 'import' + ( attr_name=%r | import_as_name< attr_name=%r 'as' any >) > + """ % (module, old_attr, old_attr) + yield """ + power< module_name=%r trailer< '.' attr_name=%r > any* > + """ % (module, old_attr) + #yield """bare_name=%s""" % alternates(bare) + + +class FixRenames(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "|".join(build_pattern()) + + order = "pre" # Pre-order tree traversal + + # Don't match the node if it's within another match + def match(self, node): + match = super(FixRenames, self).match + results = match(node) + if results: + if any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + #def start_tree(self, tree, filename): + # super(FixRenames, self).start_tree(tree, filename) + # self.replace = {} + + def transform(self, node, results): + mod_name = results.get("module_name") + attr_name = results.get("attr_name") + #bare_name = results.get("bare_name") + #import_mod = results.get("module") + + if mod_name and attr_name: + new_attr = LOOKUP[(mod_name.value, attr_name.value)] + attr_name.replace(Name(new_attr, prefix=attr_name.prefix)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_repr.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_repr.py new file mode 100644 index 00000000..1150bb8b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_repr.py @@ -0,0 +1,23 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that transforms `xyzzy` into repr(xyzzy).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Name, parenthesize + + +class FixRepr(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + atom < '`' expr=any '`' > + """ + + def transform(self, node, results): + expr = results["expr"].clone() + + if expr.type == self.syms.testlist1: + expr = parenthesize(expr) + return Call(Name("repr"), [expr], prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_set_literal.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_set_literal.py new file mode 100644 index 00000000..762550cf --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_set_literal.py @@ -0,0 +1,53 @@ +""" +Optional fixer to transform set() calls to set literals. +""" + +# Author: Benjamin Peterson + +from lib2to3 import fixer_base, pytree +from lib2to3.fixer_util import token, syms + + + +class FixSetLiteral(fixer_base.BaseFix): + + BM_compatible = True + explicit = True + + PATTERN = """power< 'set' trailer< '(' + (atom=atom< '[' (items=listmaker< any ((',' any)* [',']) > + | + single=any) ']' > + | + atom< '(' items=testlist_gexp< any ((',' any)* [',']) > ')' > + ) + ')' > > + """ + + def transform(self, node, results): + single = results.get("single") + if single: + # Make a fake listmaker + fake = pytree.Node(syms.listmaker, [single.clone()]) + single.replace(fake) + items = fake + else: + items = results["items"] + + # Build the contents of the literal + literal = [pytree.Leaf(token.LBRACE, "{")] + literal.extend(n.clone() for n in items.children) + literal.append(pytree.Leaf(token.RBRACE, "}")) + # Set the prefix of the right brace to that of the ')' or ']' + literal[-1].prefix = items.next_sibling.prefix + maker = pytree.Node(syms.dictsetmaker, literal) + maker.prefix = node.prefix + + # If the original was a one tuple, we need to remove the extra comma. + if len(maker.children) == 4: + n = maker.children[2] + n.remove() + maker.children[-1].prefix = n.prefix + + # Finally, replace the set call with our shiny new literal. + return maker diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_standarderror.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_standarderror.py new file mode 100644 index 00000000..dc742167 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_standarderror.py @@ -0,0 +1,18 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for StandardError -> Exception.""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixStandarderror(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + 'StandardError' + """ + + def transform(self, node, results): + return Name("Exception", prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_sys_exc.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_sys_exc.py new file mode 100644 index 00000000..f6039690 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_sys_exc.py @@ -0,0 +1,30 @@ +"""Fixer for sys.exc_{type, value, traceback} + +sys.exc_type -> sys.exc_info()[0] +sys.exc_value -> sys.exc_info()[1] +sys.exc_traceback -> sys.exc_info()[2] +""" + +# By Jeff Balogh and Benjamin Peterson + +# Local imports +from .. import fixer_base +from ..fixer_util import Attr, Call, Name, Number, Subscript, Node, syms + +class FixSysExc(fixer_base.BaseFix): + # This order matches the ordering of sys.exc_info(). + exc_info = ["exc_type", "exc_value", "exc_traceback"] + BM_compatible = True + PATTERN = """ + power< 'sys' trailer< dot='.' attribute=(%s) > > + """ % '|'.join("'%s'" % e for e in exc_info) + + def transform(self, node, results): + sys_attr = results["attribute"][0] + index = Number(self.exc_info.index(sys_attr.value)) + + call = Call(Name("exc_info"), prefix=sys_attr.prefix) + attr = Attr(Name("sys"), call) + attr[1].children[0].prefix = results["dot"].prefix + attr.append(Subscript(index)) + return Node(syms.power, attr, prefix=node.prefix) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_throw.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_throw.py new file mode 100644 index 00000000..aac29169 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_throw.py @@ -0,0 +1,56 @@ +"""Fixer for generator.throw(E, V, T). + +g.throw(E) -> g.throw(E) +g.throw(E, V) -> g.throw(E(V)) +g.throw(E, V, T) -> g.throw(E(V).with_traceback(T)) + +g.throw("foo"[, V[, T]]) will warn about string exceptions.""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, ArgList, Attr, is_tuple + +class FixThrow(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< any trailer< '.' 'throw' > + trailer< '(' args=arglist< exc=any ',' val=any [',' tb=any] > ')' > + > + | + power< any trailer< '.' 'throw' > trailer< '(' exc=any ')' > > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type is token.STRING: + self.cannot_convert(node, "Python 3 does not support string exceptions") + return + + # Leave "g.throw(E)" alone + val = results.get("val") + if val is None: + return + + val = val.clone() + if is_tuple(val): + args = [c.clone() for c in val.children[1:-1]] + else: + val.prefix = "" + args = [val] + + throw_args = results["args"] + + if "tb" in results: + tb = results["tb"].clone() + tb.prefix = "" + + e = Call(exc, args) + with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] + throw_args.replace(pytree.Node(syms.power, with_tb)) + else: + throw_args.replace(Call(exc, args)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_tuple_params.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_tuple_params.py new file mode 100644 index 00000000..cad755ff --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_tuple_params.py @@ -0,0 +1,175 @@ +"""Fixer for function definitions with tuple parameters. + +def func(((a, b), c), d): + ... + + -> + +def func(x, d): + ((a, b), c) = x + ... + +It will also support lambdas: + + lambda (x, y): x + y -> lambda t: t[0] + t[1] + + # The parens are a syntax error in Python 3 + lambda (x): x + y -> lambda x: x + y +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Assign, Name, Newline, Number, Subscript, syms + +def is_docstring(stmt): + return isinstance(stmt, pytree.Node) and \ + stmt.children[0].type == token.STRING + +class FixTupleParams(fixer_base.BaseFix): + run_order = 4 #use a lower order since lambda is part of other + #patterns + BM_compatible = True + + PATTERN = """ + funcdef< 'def' any parameters< '(' args=any ')' > + ['->' any] ':' suite=any+ > + | + lambda= + lambdef< 'lambda' args=vfpdef< '(' inner=any ')' > + ':' body=any + > + """ + + def transform(self, node, results): + if "lambda" in results: + return self.transform_lambda(node, results) + + new_lines = [] + suite = results["suite"] + args = results["args"] + # This crap is so "def foo(...): x = 5; y = 7" is handled correctly. + # TODO(cwinter): suite-cleanup + if suite[0].children[1].type == token.INDENT: + start = 2 + indent = suite[0].children[1].value + end = Newline() + else: + start = 0 + indent = "; " + end = pytree.Leaf(token.INDENT, "") + + # We need access to self for new_name(), and making this a method + # doesn't feel right. Closing over self and new_lines makes the + # code below cleaner. + def handle_tuple(tuple_arg, add_prefix=False): + n = Name(self.new_name()) + arg = tuple_arg.clone() + arg.prefix = "" + stmt = Assign(arg, n.clone()) + if add_prefix: + n.prefix = " " + tuple_arg.replace(n) + new_lines.append(pytree.Node(syms.simple_stmt, + [stmt, end.clone()])) + + if args.type == syms.tfpdef: + handle_tuple(args) + elif args.type == syms.typedargslist: + for i, arg in enumerate(args.children): + if arg.type == syms.tfpdef: + # Without add_prefix, the emitted code is correct, + # just ugly. + handle_tuple(arg, add_prefix=(i > 0)) + + if not new_lines: + return + + # This isn't strictly necessary, but it plays nicely with other fixers. + # TODO(cwinter) get rid of this when children becomes a smart list + for line in new_lines: + line.parent = suite[0] + + # TODO(cwinter) suite-cleanup + after = start + if start == 0: + new_lines[0].prefix = " " + elif is_docstring(suite[0].children[start]): + new_lines[0].prefix = indent + after = start + 1 + + for line in new_lines: + line.parent = suite[0] + suite[0].children[after:after] = new_lines + for i in range(after+1, after+len(new_lines)+1): + suite[0].children[i].prefix = indent + suite[0].changed() + + def transform_lambda(self, node, results): + args = results["args"] + body = results["body"] + inner = simplify_args(results["inner"]) + + # Replace lambda ((((x)))): x with lambda x: x + if inner.type == token.NAME: + inner = inner.clone() + inner.prefix = " " + args.replace(inner) + return + + params = find_params(args) + to_index = map_to_index(params) + tup_name = self.new_name(tuple_name(params)) + + new_param = Name(tup_name, prefix=" ") + args.replace(new_param.clone()) + for n in body.post_order(): + if n.type == token.NAME and n.value in to_index: + subscripts = [c.clone() for c in to_index[n.value]] + new = pytree.Node(syms.power, + [new_param.clone()] + subscripts) + new.prefix = n.prefix + n.replace(new) + + +### Helper functions for transform_lambda() + +def simplify_args(node): + if node.type in (syms.vfplist, token.NAME): + return node + elif node.type == syms.vfpdef: + # These look like vfpdef< '(' x ')' > where x is NAME + # or another vfpdef instance (leading to recursion). + while node.type == syms.vfpdef: + node = node.children[1] + return node + raise RuntimeError("Received unexpected node %s" % node) + +def find_params(node): + if node.type == syms.vfpdef: + return find_params(node.children[1]) + elif node.type == token.NAME: + return node.value + return [find_params(c) for c in node.children if c.type != token.COMMA] + +def map_to_index(param_list, prefix=[], d=None): + if d is None: + d = {} + for i, obj in enumerate(param_list): + trailer = [Subscript(Number(str(i)))] + if isinstance(obj, list): + map_to_index(obj, trailer, d=d) + else: + d[obj] = prefix + trailer + return d + +def tuple_name(param_list): + l = [] + for obj in param_list: + if isinstance(obj, list): + l.append(tuple_name(obj)) + else: + l.append(obj) + return "_".join(l) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_types.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_types.py new file mode 100644 index 00000000..67bf51f2 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_types.py @@ -0,0 +1,61 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for removing uses of the types module. + +These work for only the known names in the types module. The forms above +can include types. or not. ie, It is assumed the module is imported either as: + + import types + from types import ... # either * or specific types + +The import statements are not modified. + +There should be another fixer that handles at least the following constants: + + type([]) -> list + type(()) -> tuple + type('') -> str + +""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +_TYPE_MAPPING = { + 'BooleanType' : 'bool', + 'BufferType' : 'memoryview', + 'ClassType' : 'type', + 'ComplexType' : 'complex', + 'DictType': 'dict', + 'DictionaryType' : 'dict', + 'EllipsisType' : 'type(Ellipsis)', + #'FileType' : 'io.IOBase', + 'FloatType': 'float', + 'IntType': 'int', + 'ListType': 'list', + 'LongType': 'int', + 'ObjectType' : 'object', + 'NoneType': 'type(None)', + 'NotImplementedType' : 'type(NotImplemented)', + 'SliceType' : 'slice', + 'StringType': 'bytes', # XXX ? + 'StringTypes' : '(str,)', # XXX ? + 'TupleType': 'tuple', + 'TypeType' : 'type', + 'UnicodeType': 'str', + 'XRangeType' : 'range', + } + +_pats = ["power< 'types' trailer< '.' name='%s' > >" % t for t in _TYPE_MAPPING] + +class FixTypes(fixer_base.BaseFix): + BM_compatible = True + PATTERN = '|'.join(_pats) + + def transform(self, node, results): + new_value = _TYPE_MAPPING.get(results["name"].value) + if new_value: + return Name(new_value, prefix=node.prefix) + return None diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_unicode.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_unicode.py new file mode 100644 index 00000000..c7982c2b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_unicode.py @@ -0,0 +1,42 @@ +r"""Fixer for unicode. + +* Changes unicode to str and unichr to chr. + +* If "...\u..." is not unicode literal change it into "...\\u...". + +* Change u"..." into "...". + +""" + +from ..pgen2 import token +from .. import fixer_base + +_mapping = {"unichr" : "chr", "unicode" : "str"} + +class FixUnicode(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "STRING | 'unicode' | 'unichr'" + + def start_tree(self, tree, filename): + super(FixUnicode, self).start_tree(tree, filename) + self.unicode_literals = 'unicode_literals' in tree.future_features + + def transform(self, node, results): + if node.type == token.NAME: + new = node.clone() + new.value = _mapping[node.value] + return new + elif node.type == token.STRING: + val = node.value + if not self.unicode_literals and val[0] in '\'"' and '\\' in val: + val = r'\\'.join([ + v.replace('\\u', r'\\u').replace('\\U', r'\\U') + for v in val.split(r'\\') + ]) + if val[0] in 'uU': + val = val[1:] + if val == node.value: + return node + new = node.clone() + new.value = val + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_urllib.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_urllib.py new file mode 100644 index 00000000..ab892bc5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_urllib.py @@ -0,0 +1,196 @@ +"""Fix changes imports of urllib which are now incompatible. + This is rather similar to fix_imports, but because of the more + complex nature of the fixing for urllib, it has its own fixer. +""" +# Author: Nick Edds + +# Local imports +from lib2to3.fixes.fix_imports import alternates, FixImports +from lib2to3.fixer_util import (Name, Comma, FromImport, Newline, + find_indentation, Node, syms) + +MAPPING = {"urllib": [ + ("urllib.request", + ["URLopener", "FancyURLopener", "urlretrieve", + "_urlopener", "urlopen", "urlcleanup", + "pathname2url", "url2pathname", "getproxies"]), + ("urllib.parse", + ["quote", "quote_plus", "unquote", "unquote_plus", + "urlencode", "splitattr", "splithost", "splitnport", + "splitpasswd", "splitport", "splitquery", "splittag", + "splittype", "splituser", "splitvalue", ]), + ("urllib.error", + ["ContentTooShortError"])], + "urllib2" : [ + ("urllib.request", + ["urlopen", "install_opener", "build_opener", + "Request", "OpenerDirector", "BaseHandler", + "HTTPDefaultErrorHandler", "HTTPRedirectHandler", + "HTTPCookieProcessor", "ProxyHandler", + "HTTPPasswordMgr", + "HTTPPasswordMgrWithDefaultRealm", + "AbstractBasicAuthHandler", + "HTTPBasicAuthHandler", "ProxyBasicAuthHandler", + "AbstractDigestAuthHandler", + "HTTPDigestAuthHandler", "ProxyDigestAuthHandler", + "HTTPHandler", "HTTPSHandler", "FileHandler", + "FTPHandler", "CacheFTPHandler", + "UnknownHandler"]), + ("urllib.error", + ["URLError", "HTTPError"]), + ] +} + +# Duplicate the url parsing functions for urllib2. +MAPPING["urllib2"].append(MAPPING["urllib"][1]) + + +def build_pattern(): + bare = set() + for old_module, changes in MAPPING.items(): + for change in changes: + new_module, members = change + members = alternates(members) + yield """import_name< 'import' (module=%r + | dotted_as_names< any* module=%r any* >) > + """ % (old_module, old_module) + yield """import_from< 'from' mod_member=%r 'import' + ( member=%s | import_as_name< member=%s 'as' any > | + import_as_names< members=any* >) > + """ % (old_module, members, members) + yield """import_from< 'from' module_star=%r 'import' star='*' > + """ % old_module + yield """import_name< 'import' + dotted_as_name< module_as=%r 'as' any > > + """ % old_module + # bare_with_attr has a special significance for FixImports.match(). + yield """power< bare_with_attr=%r trailer< '.' member=%s > any* > + """ % (old_module, members) + + +class FixUrllib(FixImports): + + def build_pattern(self): + return "|".join(build_pattern()) + + def transform_import(self, node, results): + """Transform for the basic import case. Replaces the old + import name with a comma separated list of its + replacements. + """ + import_mod = results.get("module") + pref = import_mod.prefix + + names = [] + + # create a Node list of the replacement modules + for name in MAPPING[import_mod.value][:-1]: + names.extend([Name(name[0], prefix=pref), Comma()]) + names.append(Name(MAPPING[import_mod.value][-1][0], prefix=pref)) + import_mod.replace(names) + + def transform_member(self, node, results): + """Transform for imports of specific module elements. Replaces + the module to be imported from with the appropriate new + module. + """ + mod_member = results.get("mod_member") + pref = mod_member.prefix + member = results.get("member") + + # Simple case with only a single member being imported + if member: + # this may be a list of length one, or just a node + if isinstance(member, list): + member = member[0] + new_name = None + for change in MAPPING[mod_member.value]: + if member.value in change[1]: + new_name = change[0] + break + if new_name: + mod_member.replace(Name(new_name, prefix=pref)) + else: + self.cannot_convert(node, "This is an invalid module element") + + # Multiple members being imported + else: + # a dictionary for replacements, order matters + modules = [] + mod_dict = {} + members = results["members"] + for member in members: + # we only care about the actual members + if member.type == syms.import_as_name: + as_name = member.children[2].value + member_name = member.children[0].value + else: + member_name = member.value + as_name = None + if member_name != ",": + for change in MAPPING[mod_member.value]: + if member_name in change[1]: + if change[0] not in mod_dict: + modules.append(change[0]) + mod_dict.setdefault(change[0], []).append(member) + + new_nodes = [] + indentation = find_indentation(node) + first = True + def handle_name(name, prefix): + if name.type == syms.import_as_name: + kids = [Name(name.children[0].value, prefix=prefix), + name.children[1].clone(), + name.children[2].clone()] + return [Node(syms.import_as_name, kids)] + return [Name(name.value, prefix=prefix)] + for module in modules: + elts = mod_dict[module] + names = [] + for elt in elts[:-1]: + names.extend(handle_name(elt, pref)) + names.append(Comma()) + names.extend(handle_name(elts[-1], pref)) + new = FromImport(module, names) + if not first or node.parent.prefix.endswith(indentation): + new.prefix = indentation + new_nodes.append(new) + first = False + if new_nodes: + nodes = [] + for new_node in new_nodes[:-1]: + nodes.extend([new_node, Newline()]) + nodes.append(new_nodes[-1]) + node.replace(nodes) + else: + self.cannot_convert(node, "All module elements are invalid") + + def transform_dot(self, node, results): + """Transform for calls to module members in code.""" + module_dot = results.get("bare_with_attr") + member = results.get("member") + new_name = None + if isinstance(member, list): + member = member[0] + for change in MAPPING[module_dot.value]: + if member.value in change[1]: + new_name = change[0] + break + if new_name: + module_dot.replace(Name(new_name, + prefix=module_dot.prefix)) + else: + self.cannot_convert(node, "This is an invalid module element") + + def transform(self, node, results): + if results.get("module"): + self.transform_import(node, results) + elif results.get("mod_member"): + self.transform_member(node, results) + elif results.get("bare_with_attr"): + self.transform_dot(node, results) + # Renaming and star imports are not supported for these modules. + elif results.get("module_star"): + self.cannot_convert(node, "Cannot handle star imports.") + elif results.get("module_as"): + self.cannot_convert(node, "This module is now multiple modules") diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ws_comma.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ws_comma.py new file mode 100644 index 00000000..a54a376c --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_ws_comma.py @@ -0,0 +1,39 @@ +"""Fixer that changes 'a ,b' into 'a, b'. + +This also changes '{a :b}' into '{a: b}', but does not touch other +uses of colons. It does not touch other uses of whitespace. + +""" + +from .. import pytree +from ..pgen2 import token +from .. import fixer_base + +class FixWsComma(fixer_base.BaseFix): + + explicit = True # The user must ask for this fixers + + PATTERN = """ + any<(not(',') any)+ ',' ((not(',') any)+ ',')* [not(',') any]> + """ + + COMMA = pytree.Leaf(token.COMMA, ",") + COLON = pytree.Leaf(token.COLON, ":") + SEPS = (COMMA, COLON) + + def transform(self, node, results): + new = node.clone() + comma = False + for child in new.children: + if child in self.SEPS: + prefix = child.prefix + if prefix.isspace() and "\n" not in prefix: + child.prefix = "" + comma = True + else: + if comma: + prefix = child.prefix + if not prefix: + child.prefix = " " + comma = False + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xrange.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xrange.py new file mode 100644 index 00000000..1e491e16 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xrange.py @@ -0,0 +1,73 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes xrange(...) into range(...).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, Call, consuming_calls +from .. import patcomp + + +class FixXrange(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< + (name='range'|name='xrange') trailer< '(' args=any ')' > + rest=any* > + """ + + def start_tree(self, tree, filename): + super(FixXrange, self).start_tree(tree, filename) + self.transformed_xranges = set() + + def finish_tree(self, tree, filename): + self.transformed_xranges = None + + def transform(self, node, results): + name = results["name"] + if name.value == "xrange": + return self.transform_xrange(node, results) + elif name.value == "range": + return self.transform_range(node, results) + else: + raise ValueError(repr(name)) + + def transform_xrange(self, node, results): + name = results["name"] + name.replace(Name("range", prefix=name.prefix)) + # This prevents the new range call from being wrapped in a list later. + self.transformed_xranges.add(id(node)) + + def transform_range(self, node, results): + if (id(node) not in self.transformed_xranges and + not self.in_special_context(node)): + range_call = Call(Name("range"), [results["args"].clone()]) + # Encase the range call in list(). + list_call = Call(Name("list"), [range_call], + prefix=node.prefix) + # Put things that were after the range() call after the list call. + for n in results["rest"]: + list_call.append_child(n) + return list_call + + P1 = "power< func=NAME trailer< '(' node=any ')' > any* >" + p1 = patcomp.compile_pattern(P1) + + P2 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + | comparison< any 'in' node=any any*> + """ + p2 = patcomp.compile_pattern(P2) + + def in_special_context(self, node): + if node.parent is None: + return False + results = {} + if (node.parent.parent is not None and + self.p1.match(node.parent.parent, results) and + results["node"] is node): + # list(d.keys()) -> list(d.keys()), etc. + return results["func"].value in consuming_calls + # for ... in d.iterkeys() -> for ... in d.keys(), etc. + return self.p2.match(node.parent, results) and results["node"] is node diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xreadlines.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xreadlines.py new file mode 100644 index 00000000..3e3f71ab --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_xreadlines.py @@ -0,0 +1,25 @@ +"""Fix "for x in f.xreadlines()" -> "for x in f". + +This fixer will also convert g(f.xreadlines) into g(f.__iter__).""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixXreadlines(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< call=any+ trailer< '.' 'xreadlines' > trailer< '(' ')' > > + | + power< any+ trailer< '.' no_call='xreadlines' > > + """ + + def transform(self, node, results): + no_call = results.get("no_call") + + if no_call: + no_call.replace(Name("__iter__", prefix=no_call.prefix)) + else: + node.replace([x.clone() for x in results["call"]]) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_zip.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_zip.py new file mode 100644 index 00000000..52c28df6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/fixes/fix_zip.py @@ -0,0 +1,46 @@ +""" +Fixer that changes zip(seq0, seq1, ...) into list(zip(seq0, seq1, ...) +unless there exists a 'from future_builtins import zip' statement in the +top-level namespace. + +We avoid the transformation if the zip() call is directly contained in +iter(<>), list(<>), tuple(<>), sorted(<>), ...join(<>), or for V in <>:. +""" + +# Local imports +from .. import fixer_base +from ..pytree import Node +from ..pygram import python_symbols as syms +from ..fixer_util import Name, ArgList, in_special_context + + +class FixZip(fixer_base.ConditionalFix): + + BM_compatible = True + PATTERN = """ + power< 'zip' args=trailer< '(' [any] ')' > [trailers=trailer*] + > + """ + + skip_on = "future_builtins.zip" + + def transform(self, node, results): + if self.should_skip(node): + return + + if in_special_context(node): + return None + + args = results['args'].clone() + args.prefix = "" + + trailers = [] + if 'trailers' in results: + trailers = [n.clone() for n in results['trailers']] + for n in trailers: + n.prefix = "" + + new = Node(syms.power, [Name("zip"), args], prefix="") + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = node.prefix + return new diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/main.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/main.py new file mode 100644 index 00000000..f2849fd6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/main.py @@ -0,0 +1,273 @@ +""" +Main program for 2to3. +""" + +from __future__ import with_statement, print_function + +import sys +import os +import difflib +import logging +import shutil +import optparse + +from . import refactor + + +def diff_texts(a, b, filename): + """Return a unified diff of two strings.""" + a = a.splitlines() + b = b.splitlines() + return difflib.unified_diff(a, b, filename, filename, + "(original)", "(refactored)", + lineterm="") + + +class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool): + """ + A refactoring tool that can avoid overwriting its input files. + Prints output to stdout. + + Output files can optionally be written to a different directory and or + have an extra file suffix appended to their name for use in situations + where you do not want to replace the input files. + """ + + def __init__(self, fixers, options, explicit, nobackups, show_diffs, + input_base_dir='', output_dir='', append_suffix=''): + """ + Args: + fixers: A list of fixers to import. + options: A dict with RefactoringTool configuration. + explicit: A list of fixers to run even if they are explicit. + nobackups: If true no backup '.bak' files will be created for those + files that are being refactored. + show_diffs: Should diffs of the refactoring be printed to stdout? + input_base_dir: The base directory for all input files. This class + will strip this path prefix off of filenames before substituting + it with output_dir. Only meaningful if output_dir is supplied. + All files processed by refactor() must start with this path. + output_dir: If supplied, all converted files will be written into + this directory tree instead of input_base_dir. + append_suffix: If supplied, all files output by this tool will have + this appended to their filename. Useful for changing .py to + .py3 for example by passing append_suffix='3'. + """ + self.nobackups = nobackups + self.show_diffs = show_diffs + if input_base_dir and not input_base_dir.endswith(os.sep): + input_base_dir += os.sep + self._input_base_dir = input_base_dir + self._output_dir = output_dir + self._append_suffix = append_suffix + super(StdoutRefactoringTool, self).__init__(fixers, options, explicit) + + def log_error(self, msg, *args, **kwargs): + self.errors.append((msg, args, kwargs)) + self.logger.error(msg, *args, **kwargs) + + def write_file(self, new_text, filename, old_text, encoding): + orig_filename = filename + if self._output_dir: + if filename.startswith(self._input_base_dir): + filename = os.path.join(self._output_dir, + filename[len(self._input_base_dir):]) + else: + raise ValueError('filename %s does not start with the ' + 'input_base_dir %s' % ( + filename, self._input_base_dir)) + if self._append_suffix: + filename += self._append_suffix + if orig_filename != filename: + output_dir = os.path.dirname(filename) + if not os.path.isdir(output_dir) and output_dir: + os.makedirs(output_dir) + self.log_message('Writing converted %s to %s.', orig_filename, + filename) + if not self.nobackups: + # Make backup + backup = filename + ".bak" + if os.path.lexists(backup): + try: + os.remove(backup) + except OSError: + self.log_message("Can't remove backup %s", backup) + try: + os.rename(filename, backup) + except OSError: + self.log_message("Can't rename %s to %s", filename, backup) + # Actually write the new file + write = super(StdoutRefactoringTool, self).write_file + write(new_text, filename, old_text, encoding) + if not self.nobackups: + shutil.copymode(backup, filename) + if orig_filename != filename: + # Preserve the file mode in the new output directory. + shutil.copymode(orig_filename, filename) + + def print_output(self, old, new, filename, equal): + if equal: + self.log_message("No changes to %s", filename) + else: + self.log_message("Refactored %s", filename) + if self.show_diffs: + diff_lines = diff_texts(old, new, filename) + try: + if self.output_lock is not None: + with self.output_lock: + for line in diff_lines: + print(line) + sys.stdout.flush() + else: + for line in diff_lines: + print(line) + except UnicodeEncodeError: + warn("couldn't encode %s's diff for your terminal" % + (filename,)) + return + +def warn(msg): + print("WARNING: %s" % (msg,), file=sys.stderr) + + +def main(fixer_pkg, args=None): + """Main program. + + Args: + fixer_pkg: the name of a package where the fixers are located. + args: optional; a list of command line arguments. If omitted, + sys.argv[1:] is used. + + Returns a suggested exit status (0, 1, 2). + """ + # Set up option parser + parser = optparse.OptionParser(usage="2to3 [options] file|dir ...") + parser.add_option("-d", "--doctests_only", action="store_true", + help="Fix up doctests only") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a transformation from being run") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + parser.add_option("-p", "--print-function", action="store_true", + help="Modify the grammar so that print() is a function") + parser.add_option("-e", "--exec-function", action="store_true", + help="Modify the grammar so that exec() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files") + parser.add_option("-o", "--output-dir", action="store", type="str", + default="", help="Put output files in this directory " + "instead of overwriting the input files. Requires -n.") + parser.add_option("-W", "--write-unchanged-files", action="store_true", + help="Also write files even if no changes were required" + " (useful with --output-dir); implies -w.") + parser.add_option("--add-suffix", action="store", type="str", default="", + help="Append this string to all output filenames." + " Requires -n if non-empty. " + "ex: --add-suffix='3' will generate .py3 files.") + + # Parse command line arguments + refactor_stdin = False + flags = {} + options, args = parser.parse_args(args) + if options.write_unchanged_files: + flags["write_unchanged_files"] = True + if not options.write: + warn("--write-unchanged-files/-W implies -w.") + options.write = True + # If we allowed these, the original files would be renamed to backup names + # but not replaced. + if options.output_dir and not options.nobackups: + parser.error("Can't use --output-dir/-o without -n.") + if options.add_suffix and not options.nobackups: + parser.error("Can't use --add-suffix without -n.") + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + for fixname in refactor.get_all_fix_names(fixer_pkg): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + if options.print_function: + flags["print_function"] = True + + if options.exec_function: + flags["exec_function"] = True + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + logger = logging.getLogger('lib2to3.main') + + # Initialize the refactoring tool + avail_fixes = set(refactor.get_fixers_from_package(fixer_pkg)) + unwanted_fixes = set(fixer_pkg + ".fix_" + fix for fix in options.nofix) + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == "all": + all_present = True + else: + explicit.add(fixer_pkg + ".fix_" + fix) + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + fixer_names = requested.difference(unwanted_fixes) + input_base_dir = os.path.commonprefix(args) + if (input_base_dir and not input_base_dir.endswith(os.sep) + and not os.path.isdir(input_base_dir)): + # One or more similar names were passed, their directory is the base. + # os.path.commonprefix() is ignorant of path elements, this corrects + # for that weird API. + input_base_dir = os.path.dirname(input_base_dir) + if options.output_dir: + input_base_dir = input_base_dir.rstrip(os.sep) + logger.info('Output in %r will mirror the input directory %r layout.', + options.output_dir, input_base_dir) + rt = StdoutRefactoringTool( + sorted(fixer_names), flags, sorted(explicit), + options.nobackups, not options.no_diffs, + input_base_dir=input_base_dir, + output_dir=options.output_dir, + append_suffix=options.add_suffix) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, options.doctests_only, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't supported on this platform.", + file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/patcomp.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/patcomp.py new file mode 100644 index 00000000..f57f4954 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/patcomp.py @@ -0,0 +1,204 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Pattern compiler. + +The grammar is taken from PatternGrammar.txt. + +The compiler compiles a pattern to a pytree.*Pattern instance. +""" + +__author__ = "Guido van Rossum " + +# Python imports +import io + +# Fairly local imports +from .pgen2 import driver, literals, token, tokenize, parse, grammar + +# Really local imports +from . import pytree +from . import pygram + + +class PatternSyntaxError(Exception): + pass + + +def tokenize_wrapper(input): + """Tokenizes a string suppressing significant whitespace.""" + skip = {token.NEWLINE, token.INDENT, token.DEDENT} + tokens = tokenize.generate_tokens(io.StringIO(input).readline) + for quintuple in tokens: + type, value, start, end, line_text = quintuple + if type not in skip: + yield quintuple + + +class PatternCompiler(object): + + def __init__(self, grammar_file=None): + """Initializer. + + Takes an optional alternative filename for the pattern grammar. + """ + if grammar_file is None: + self.grammar = pygram.pattern_grammar + self.syms = pygram.pattern_symbols + else: + self.grammar = driver.load_grammar(grammar_file) + self.syms = pygram.Symbols(self.grammar) + self.pygrammar = pygram.python_grammar + self.pysyms = pygram.python_symbols + self.driver = driver.Driver(self.grammar, convert=pattern_convert) + + def compile_pattern(self, input, debug=False, with_tree=False): + """Compiles a pattern string to a nested pytree.*Pattern object.""" + tokens = tokenize_wrapper(input) + try: + root = self.driver.parse_tokens(tokens, debug=debug) + except parse.ParseError as e: + raise PatternSyntaxError(str(e)) from None + if with_tree: + return self.compile_node(root), root + else: + return self.compile_node(root) + + def compile_node(self, node): + """Compiles a node, recursively. + + This is one big switch on the node type. + """ + # XXX Optimize certain Wildcard-containing-Wildcard patterns + # that can be merged + if node.type == self.syms.Matcher: + node = node.children[0] # Avoid unneeded recursion + + if node.type == self.syms.Alternatives: + # Skip the odd children since they are just '|' tokens + alts = [self.compile_node(ch) for ch in node.children[::2]] + if len(alts) == 1: + return alts[0] + p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) + return p.optimize() + + if node.type == self.syms.Alternative: + units = [self.compile_node(ch) for ch in node.children] + if len(units) == 1: + return units[0] + p = pytree.WildcardPattern([units], min=1, max=1) + return p.optimize() + + if node.type == self.syms.NegatedUnit: + pattern = self.compile_basic(node.children[1:]) + p = pytree.NegatedPattern(pattern) + return p.optimize() + + assert node.type == self.syms.Unit + + name = None + nodes = node.children + if len(nodes) >= 3 and nodes[1].type == token.EQUAL: + name = nodes[0].value + nodes = nodes[2:] + repeat = None + if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: + repeat = nodes[-1] + nodes = nodes[:-1] + + # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] + pattern = self.compile_basic(nodes, repeat) + + if repeat is not None: + assert repeat.type == self.syms.Repeater + children = repeat.children + child = children[0] + if child.type == token.STAR: + min = 0 + max = pytree.HUGE + elif child.type == token.PLUS: + min = 1 + max = pytree.HUGE + elif child.type == token.LBRACE: + assert children[-1].type == token.RBRACE + assert len(children) in (3, 5) + min = max = self.get_int(children[1]) + if len(children) == 5: + max = self.get_int(children[3]) + else: + assert False + if min != 1 or max != 1: + pattern = pattern.optimize() + pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) + + if name is not None: + pattern.name = name + return pattern.optimize() + + def compile_basic(self, nodes, repeat=None): + # Compile STRING | NAME [Details] | (...) | [...] + assert len(nodes) >= 1 + node = nodes[0] + if node.type == token.STRING: + value = str(literals.evalString(node.value)) + return pytree.LeafPattern(_type_of_literal(value), value) + elif node.type == token.NAME: + value = node.value + if value.isupper(): + if value not in TOKEN_MAP: + raise PatternSyntaxError("Invalid token: %r" % value) + if nodes[1:]: + raise PatternSyntaxError("Can't have details for token") + return pytree.LeafPattern(TOKEN_MAP[value]) + else: + if value == "any": + type = None + elif not value.startswith("_"): + type = getattr(self.pysyms, value, None) + if type is None: + raise PatternSyntaxError("Invalid symbol: %r" % value) + if nodes[1:]: # Details present + content = [self.compile_node(nodes[1].children[1])] + else: + content = None + return pytree.NodePattern(type, content) + elif node.value == "(": + return self.compile_node(nodes[1]) + elif node.value == "[": + assert repeat is None + subpattern = self.compile_node(nodes[1]) + return pytree.WildcardPattern([[subpattern]], min=0, max=1) + assert False, node + + def get_int(self, node): + assert node.type == token.NUMBER + return int(node.value) + + +# Map named tokens to the type value for a LeafPattern +TOKEN_MAP = {"NAME": token.NAME, + "STRING": token.STRING, + "NUMBER": token.NUMBER, + "TOKEN": None} + + +def _type_of_literal(value): + if value[0].isalpha(): + return token.NAME + elif value in grammar.opmap: + return grammar.opmap[value] + else: + return None + + +def pattern_convert(grammar, raw_node_info): + """Converts raw node information to a Node or Leaf instance.""" + type, value, context, children = raw_node_info + if children or type in grammar.number2symbol: + return pytree.Node(type, children, context=context) + else: + return pytree.Leaf(type, value, context=context) + + +def compile_pattern(pattern): + return PatternCompiler().compile_pattern(pattern) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/__init__.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/__init__.py new file mode 100644 index 00000000..af390484 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""The pgen2 package.""" diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/conv.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/conv.py new file mode 100644 index 00000000..ed0cac53 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/conv.py @@ -0,0 +1,257 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Convert graminit.[ch] spit out by pgen to Python code. + +Pgen is the Python parser generator. It is useful to quickly create a +parser from a grammar file in Python's grammar notation. But I don't +want my parsers to be written in C (yet), so I'm translating the +parsing tables to Python data structures and writing a Python parse +engine. + +Note that the token numbers are constants determined by the standard +Python tokenizer. The standard token module defines these numbers and +their names (the names are not used much). The token numbers are +hardcoded into the Python tokenizer and into pgen. A Python +implementation of the Python tokenizer is also available, in the +standard tokenize module. + +On the other hand, symbol numbers (representing the grammar's +non-terminals) are assigned by pgen based on the actual grammar +input. + +Note: this module is pretty much obsolete; the pgen module generates +equivalent grammar tables directly from the Grammar.txt input file +without having to invoke the Python pgen C program. + +""" + +# Python imports +import re + +# Local imports +from pgen2 import grammar, token + + +class Converter(grammar.Grammar): + """Grammar subclass that reads classic pgen output files. + + The run() method reads the tables as produced by the pgen parser + generator, typically contained in two C files, graminit.h and + graminit.c. The other methods are for internal use only. + + See the base class for more documentation. + + """ + + def run(self, graminit_h, graminit_c): + """Load the grammar tables from the text files written by pgen.""" + self.parse_graminit_h(graminit_h) + self.parse_graminit_c(graminit_c) + self.finish_off() + + def parse_graminit_h(self, filename): + """Parse the .h file written by pgen. (Internal) + + This file is a sequence of #define statements defining the + nonterminals of the grammar as numbers. We build two tables + mapping the numbers to names and back. + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + self.symbol2number = {} + self.number2symbol = {} + lineno = 0 + for line in f: + lineno += 1 + mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line) + if not mo and line.strip(): + print("%s(%s): can't parse %s" % (filename, lineno, + line.strip())) + else: + symbol, number = mo.groups() + number = int(number) + assert symbol not in self.symbol2number + assert number not in self.number2symbol + self.symbol2number[symbol] = number + self.number2symbol[number] = symbol + return True + + def parse_graminit_c(self, filename): + """Parse the .c file written by pgen. (Internal) + + The file looks as follows. The first two lines are always this: + + #include "pgenheaders.h" + #include "grammar.h" + + After that come four blocks: + + 1) one or more state definitions + 2) a table defining dfas + 3) a table defining labels + 4) a struct defining the grammar + + A state definition has the following form: + - one or more arc arrays, each of the form: + static arc arcs__[] = { + {, }, + ... + }; + - followed by a state array, of the form: + static state states_[] = { + {, arcs__}, + ... + }; + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + # The code below essentially uses f's iterator-ness! + lineno = 0 + + # Expect the two #include lines + lineno, line = lineno+1, next(f) + assert line == '#include "pgenheaders.h"\n', (lineno, line) + lineno, line = lineno+1, next(f) + assert line == '#include "grammar.h"\n', (lineno, line) + + # Parse the state definitions + lineno, line = lineno+1, next(f) + allarcs = {} + states = [] + while line.startswith("static arc "): + while line.startswith("static arc "): + mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", + line) + assert mo, (lineno, line) + n, m, k = list(map(int, mo.groups())) + arcs = [] + for _ in range(k): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), (\d+)},$", line) + assert mo, (lineno, line) + i, j = list(map(int, mo.groups())) + arcs.append((i, j)) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + allarcs[(n, m)] = arcs + lineno, line = lineno+1, next(f) + mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line) + assert mo, (lineno, line) + s, t = list(map(int, mo.groups())) + assert s == len(states), (lineno, line) + state = [] + for _ in range(t): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line) + assert mo, (lineno, line) + k, n, m = list(map(int, mo.groups())) + arcs = allarcs[n, m] + assert k == len(arcs), (lineno, line) + state.append(arcs) + states.append(state) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + lineno, line = lineno+1, next(f) + self.states = states + + # Parse the dfas + dfas = {} + mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + for i in range(ndfas): + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', + line) + assert mo, (lineno, line) + symbol = mo.group(2) + number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) + assert self.symbol2number[symbol] == number, (lineno, line) + assert self.number2symbol[number] == symbol, (lineno, line) + assert x == 0, (lineno, line) + state = states[z] + assert y == len(state), (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) + assert mo, (lineno, line) + first = {} + rawbitset = eval(mo.group(1)) + for i, c in enumerate(rawbitset): + byte = ord(c) + for j in range(8): + if byte & (1<= os.path.getmtime(b) + + +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilitates using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + +def main(*args): + """Main program, when run as a script: produce grammar pickle files. + + Calls load_grammar for each argument, a path to a grammar text file. + """ + if not args: + args = sys.argv[1:] + logging.basicConfig(level=logging.INFO, stream=sys.stdout, + format='%(message)s') + for gt in args: + load_grammar(gt, save=True, force=True) + return True + +if __name__ == "__main__": + sys.exit(int(not main())) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/grammar.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/grammar.py new file mode 100644 index 00000000..5d550aeb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/grammar.py @@ -0,0 +1,189 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +# Python imports +import pickle + +# Local imports +from . import token + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, "EMPTY")] + self.keywords = {} + self.tokens = {} + self.symbol2label = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file.""" + with open(filename, "wb") as f: + pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, "rb") as f: + d = pickle.load(f) + self.__dict__.update(d) + + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", + "tokens", "symbol2label"): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print("s2n") + pprint(self.symbol2number) + print("n2s") + pprint(self.number2symbol) + print("states") + pprint(self.states) + print("dfas") + pprint(self.dfas) + print("labels") + pprint(self.labels) + print("start", self.start) + + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +@= ATEQUAL +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +:= COLONEQUAL +""" + +opmap = {} +for line in opmap_raw.splitlines(): + if line: + op, name = line.split() + opmap[op] = getattr(token, name) +del line, op, name diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/literals.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/literals.py new file mode 100644 index 00000000..b9b63e6e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/literals.py @@ -0,0 +1,60 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Safely evaluate Python string literals without using eval().""" + +import re + +simple_escapes = {"a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", + "'": "'", + '"': '"', + "\\": "\\"} + +def escape(m): + all, tail = m.group(0, 1) + assert all.startswith("\\") + esc = simple_escapes.get(tail) + if esc is not None: + return esc + if tail.startswith("x"): + hexes = tail[1:] + if len(hexes) < 2: + raise ValueError("invalid hex string escape ('\\%s')" % tail) + try: + i = int(hexes, 16) + except ValueError: + raise ValueError("invalid hex string escape ('\\%s')" % tail) from None + else: + try: + i = int(tail, 8) + except ValueError: + raise ValueError("invalid octal string escape ('\\%s')" % tail) from None + return chr(i) + +def evalString(s): + assert s.startswith("'") or s.startswith('"'), repr(s[:1]) + q = s[0] + if s[:3] == q*3: + q = q*3 + assert s.endswith(q), repr(s[-len(q):]) + assert len(s) >= 2*len(q) + s = s[len(q):-len(q)] + return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) + +def test(): + for i in range(256): + c = chr(i) + s = repr(c) + e = evalString(s) + if e != c: + print(i, c, s, e) + + +if __name__ == "__main__": + test() diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/parse.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/parse.py new file mode 100644 index 00000000..cf3fcf7e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/parse.py @@ -0,0 +1,204 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" + +# Local imports +from . import token + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % + (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + + def __reduce__(self): + return type(self), (self.msg, self.type, self.value, self.context) + +class Parser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + : + if p.addtoken(...): # parse a token; may raise ParseError + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when addtoken() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, addtoken() raises + the ParseError exception. There is no error recovery; the parser + cannot be used after a syntax error was reported (but it can be + reinitialized by calling setup()). + + """ + + def __init__(self, grammar, convert=None): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, value, context, nodes) + tuple, where type is the node type (a token or symbol number), + value is None for symbols and a string for tokens, context is + None or an opaque value used for error reporting (typically a + (lineno, offset) pair), and nodes is a list of children for + symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert = convert or (lambda grammar, node: node) + + def setup(self, start=None): + """Prepare for parsing. + + This *must* be called before starting to parse. + + The optional argument is an alternative start symbol; it + defaults to the grammar's start symbol. + + You can use a Parser instance to parse any number of programs; + each time you call setup() the parser is reset to an initial + state determined by the (implicit or explicit) start symbol. + + """ + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + # Map from token to label + ilabel = self.classify(type, value, context) + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = self.grammar.labels[i] + if ilabel == i: + # Look it up in the list of labels + assert t < 256 + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, self.grammar.dfas[t], newstate, context) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError("too much input", + type, value, context) + else: + # No success finding a transition + raise ParseError("bad input", type, value, context) + + def classify(self, type, value, context): + """Turn a token into a label. (Internal)""" + if type == token.NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + ilabel = self.grammar.keywords.get(value) + if ilabel is not None: + return ilabel + ilabel = self.grammar.tokens.get(type) + if ilabel is None: + raise ParseError("bad token", type, value, context) + return ilabel + + def shift(self, type, value, newstate, context): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, value, context, None) + newnode = self.convert(self.grammar, newnode) + if newnode is not None: + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def push(self, type, newdfa, newstate, context): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, popnode = self.stack.pop() + newnode = self.convert(self.grammar, popnode) + if newnode is not None: + if self.stack: + dfa, state, node = self.stack[-1] + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/pgen.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/pgen.py new file mode 100644 index 00000000..7abd5cef --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/pgen.py @@ -0,0 +1,386 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Pgen imports +from . import grammar, token, tokenize + +class PgenGrammar(grammar.Grammar): + pass + +class ParserGenerator(object): + + def __init__(self, filename, stream=None): + close_stream = None + if stream is None: + stream = open(filename, encoding="utf-8") + close_stream = stream.close + self.filename = filename + self.stream = stream + self.generator = tokenize.generate_tokens(stream.readline) + self.gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self.parse() + if close_stream is not None: + close_stream() + self.first = {} # map from symbol name to set of tokens + self.addfirstsets() + + def make_grammar(self): + c = PgenGrammar() + names = list(self.dfas.keys()) + names.sort() + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in sorted(state.arcs.items()): + arcs.append((self.make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in sorted(rawfirst): + ilabel = self.make_label(c, label) + ##assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(token, label, None) + assert isinstance(itoken, int), label + assert itoken in token.tok_name, label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in c.keywords: + return c.keywords[value] + else: + c.labels.append((token.NAME, value)) + c.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = grammar.opmap[value] # Fails if unknown token + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self.calcfirst(name) + #print name, self.first[name].keys() + + def calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError("recursion for rule %r" % name) + else: + self.calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self.gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self.expect(token.NAME) + self.expect(token.OP, ":") + a, z = self.parse_rhs() + self.expect(token.NEWLINE) + #self.dump_nfa(name, a, z) + dfa = self.make_dfa(a, z) + #self.dump_dfa(name, dfa) + oldlen = len(dfa) + self.simplify_dfa(dfa) + newlen = len(dfa) + dfas[name] = dfa + #print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + def closure(state): + base = {} + addclosure(state, base) + return base + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in sorted(arcs.items()): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def dump_nfa(self, name, start, finish): + print("Dump of NFA for", name) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + def dump_dfa(self, name, dfa): + print("Dump of DFA for", name) + for i, state in enumerate(dfa): + print(" State", i, state.isfinal and "(final)" or "") + for label, next in sorted(state.arcs.items()): + print(" %s -> %d" % (label, dfa.index(next))) + + def simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i+1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + #print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self.parse_alt() + if self.value != "|": + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == "|": + self.gettoken() + a, z = self.parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def parse_alt(self): + # ALT: ITEM+ + a, b = self.parse_item() + while (self.value in ("(", "[") or + self.type in (token.NAME, token.STRING)): + c, d = self.parse_item() + b.addarc(c) + b = d + return a, b + + def parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == "[": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, "]") + a.addarc(z) + return a, z + else: + a, z = self.parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self.gettoken() + z.addarc(a) + if value == "+": + return a, z + else: + return a, a + + def parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == "(": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ")") + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self.gettoken() + return a, z + else: + self.raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def expect(self, type, value=None): + if self.type != type or (value is not None and self.value != value): + self.raise_error("expected %s/%s, got %s/%s", + type, value, self.type, self.value) + value = self.value + self.gettoken() + return value + + def gettoken(self): + tup = next(self.generator) + while tup[0] in (tokenize.COMMENT, tokenize.NL): + tup = next(self.generator) + self.type, self.value, self.begin, self.end, self.line = tup + #print token.tok_name[self.type], repr(self.value) + + def raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + raise SyntaxError(msg, (self.filename, self.end[0], + self.end[1], self.line)) + +class NFAState(object): + + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + +class DFAState(object): + + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + +def generate_grammar(filename="Grammar.txt"): + p = ParserGenerator(filename) + return p.make_grammar() diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/token.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/token.py new file mode 100755 index 00000000..2a55138e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/token.py @@ -0,0 +1,86 @@ +#! /usr/bin/env python3 + +"""Token constants (from "token.h").""" + +# Taken from Python (r53757) and modified to include some tokens +# originally monkeypatched in by pgen2.tokenize + +#--start constants-- +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +BACKQUOTE = 25 +LBRACE = 26 +RBRACE = 27 +EQEQUAL = 28 +NOTEQUAL = 29 +LESSEQUAL = 30 +GREATEREQUAL = 31 +TILDE = 32 +CIRCUMFLEX = 33 +LEFTSHIFT = 34 +RIGHTSHIFT = 35 +DOUBLESTAR = 36 +PLUSEQUAL = 37 +MINEQUAL = 38 +STAREQUAL = 39 +SLASHEQUAL = 40 +PERCENTEQUAL = 41 +AMPEREQUAL = 42 +VBAREQUAL = 43 +CIRCUMFLEXEQUAL = 44 +LEFTSHIFTEQUAL = 45 +RIGHTSHIFTEQUAL = 46 +DOUBLESTAREQUAL = 47 +DOUBLESLASH = 48 +DOUBLESLASHEQUAL = 49 +AT = 50 +ATEQUAL = 51 +OP = 52 +COMMENT = 53 +NL = 54 +RARROW = 55 +AWAIT = 56 +ASYNC = 57 +ERRORTOKEN = 58 +COLONEQUAL = 59 +N_TOKENS = 60 +NT_OFFSET = 256 +#--end constants-- + +tok_name = {} +for _name, _value in list(globals().items()): + if isinstance(_value, int): + tok_name[_value] = _name + + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/tokenize.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/tokenize.py new file mode 100644 index 00000000..099dfa77 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pgen2/tokenize.py @@ -0,0 +1,564 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. +# All rights reserved. + +"""Tokenization help for Python programs. + +generate_tokens(readline) is a generator that breaks a stream of +text into Python tokens. It accepts a readline-like method which is called +repeatedly to get the next line of input (or "" for EOF). It generates +5-tuples with these members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators + +Older entry points + tokenize_loop(readline, tokeneater) + tokenize(readline, tokeneater=printtoken) +are the same, except instead of generating tokens, tokeneater is a callback +function to which the 5 fields described above are passed as 5 arguments, +each time a new token is found.""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = \ + 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' + +import string, re +from codecs import BOM_UTF8, lookup +from lib2to3.pgen2.token import * + +from . import token +__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize", + "generate_tokens", "untokenize"] +del token + +try: + bytes +except NameError: + # Support bytes type in Python <= 2.5, so 2to3 turns itself into + # valid Python 3 code. + bytes = str + +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' +def _combinations(*l): + return set( + x + y for x in l for y in l + ("",) if x.casefold() != y.casefold() + ) + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'\w+' + +Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' +Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' +Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' +Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') +Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+(?:_\d+)*' +Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent) +Expfloat = r'\d+(?:_\d+)*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?" +Triple = group(_litprefix + "'''", _litprefix + '"""') +# Single-line ' or " string. +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", + r"//=?", r"->", + r"[+\-*/%&@|^=<>]=?", + r"~") + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r':=', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = map( + re.compile, (Token, PseudoToken, Single3, Double3)) + +_strprefixes = ( + _combinations('r', 'R', 'f', 'F') | + _combinations('r', 'R', 'b', 'B') | + {'u', 'U', 'ur', 'uR', 'Ur', 'UR'} +) + +endprogs = {"'": re.compile(Single), '"': re.compile(Double), + "'''": single3prog, '"""': double3prog, + **{f"{prefix}'''": single3prog for prefix in _strprefixes}, + **{f'{prefix}"""': double3prog for prefix in _strprefixes}, + **{prefix: None for prefix in _strprefixes}} + +triple_quoted = ( + {"'''", '"""'} | + {f"{prefix}'''" for prefix in _strprefixes} | + {f'{prefix}"""' for prefix in _strprefixes} +) +single_quoted = ( + {"'", '"'} | + {f"{prefix}'" for prefix in _strprefixes} | + {f'{prefix}"' for prefix in _strprefixes} +) + +tabsize = 8 + +class TokenError(Exception): pass + +class StopTokenizing(Exception): pass + +def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing + (srow, scol) = xxx_todo_changeme + (erow, ecol) = xxx_todo_changeme1 + print("%d,%d-%d,%d:\t%s\t%s" % \ + (srow, scol, erow, ecol, tok_name[type], repr(token))) + +def tokenize(readline, tokeneater=printtoken): + """ + The tokenize() function accepts two parameters: one representing the + input stream, and one providing an output mechanism for tokenize(). + + The first parameter, readline, must be a callable object which provides + the same interface as the readline() method of built-in file objects. + Each call to the function should return one line of input as a string. + + The second parameter, tokeneater, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the + tuples generated by generate_tokens(). + """ + try: + tokenize_loop(readline, tokeneater) + except StopTokenizing: + pass + +# backwards compatible interface +def tokenize_loop(readline, tokeneater): + for token_info in generate_tokens(readline): + tokeneater(*token_info) + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + + def add_whitespace(self, start): + row, col = start + assert row <= self.prev_row + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(" " * col_offset) + + def untokenize(self, iterable): + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + tok_type, token, start, end, line = t + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return "".join(self.tokens) + + def compat(self, token, iterable): + startline = False + indents = [] + toks_append = self.tokens.append + toknum, tokval = token + if toknum in (NAME, NUMBER): + tokval += ' ' + if toknum in (NEWLINE, NL): + startline = True + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum in (NAME, NUMBER, ASYNC, AWAIT): + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read + in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, but + disagree, a SyntaxError will be raised. If the encoding cookie is an invalid + charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return bytes() + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited input: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + return ut.untokenize(iterable) + +def generate_tokens(readline): + """ + The generate_tokens() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + physical line. + """ + lnum = parenlev = continued = 0 + contstr, needcont = '', 0 + contline = None + indents = [0] + + # 'stashed' and 'async_*' are used for async/await parsing + stashed = None + async_def = False + async_def_indent = 0 + async_def_nl = False + + while 1: # loop over lines in stream + try: + line = readline() + except StopIteration: + line = '' + lnum = lnum + 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError("EOF in multi-line string", strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield (STRING, contstr + line[:end], + strstart, (lnum, end), contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield (ERRORTOKEN, contstr + line, + strstart, (lnum, len(line)), contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': column = column + 1 + elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize + elif line[pos] == '\f': column = 0 + else: break + pos = pos + 1 + if pos == max: break + + if stashed: + yield stashed + stashed = None + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + nl_pos = pos + len(comment_token) + yield (COMMENT, comment_token, + (lnum, pos), (lnum, pos + len(comment_token)), line) + yield (NL, line[nl_pos:], + (lnum, nl_pos), (lnum, len(line)), line) + else: + yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], + (lnum, pos), (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level", + ("", lnum, pos, line)) + indents = indents[:-1] + + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + else: # continued statement + if not line: + raise TokenError("EOF in multi-line statement", (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = pseudoprog.match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if initial in string.digits or \ + (initial == '.' and token != '.'): # ordinary number + yield (NUMBER, token, spos, epos, line) + elif initial in '\r\n': + newline = NEWLINE + if parenlev > 0: + newline = NL + elif async_def: + async_def_nl = True + if stashed: + yield stashed + stashed = None + yield (newline, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith("\n") + if stashed: + yield stashed + stashed = None + yield (COMMENT, token, spos, epos, line) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, epos, line) + elif initial.isidentifier(): # ordinary name + if token in ('async', 'await'): + if async_def: + yield (ASYNC if token == 'async' else AWAIT, + token, spos, epos, line) + continue + + tok = (NAME, token, spos, epos, line) + if token == 'async' and not stashed: + stashed = tok + continue + + if token in ('def', 'for'): + if (stashed + and stashed[0] == NAME + and stashed[1] == 'async'): + + if token == 'def': + async_def = True + async_def_indent = indents[-1] + + yield (ASYNC, stashed[1], + stashed[2], stashed[3], + stashed[4]) + stashed = None + + if stashed: + yield stashed + stashed = None + + yield tok + elif initial == '\\': # continued stmt + # This yield is new; needed for better idempotency: + if stashed: + yield stashed + stashed = None + yield (NL, token, spos, (lnum, pos), line) + continued = 1 + else: + if initial in '([{': parenlev = parenlev + 1 + elif initial in ')]}': parenlev = parenlev - 1 + if stashed: + yield stashed + stashed = None + yield (OP, token, spos, epos, line) + else: + yield (ERRORTOKEN, line[pos], + (lnum, pos), (lnum, pos+1), line) + pos = pos + 1 + + if stashed: + yield stashed + stashed = None + + for indent in indents[1:]: # pop remaining indent levels + yield (DEDENT, '', (lnum, 0), (lnum, 0), '') + yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') + +if __name__ == '__main__': # testing + import sys + if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) + else: tokenize(sys.stdin.readline) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pygram.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pygram.py new file mode 100644 index 00000000..24d9db92 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pygram.py @@ -0,0 +1,43 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Export the Python grammar and symbols.""" + +# Python imports +import os + +# Local imports +from .pgen2 import token +from .pgen2 import driver +from . import pytree + +# The grammar file +_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") +_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), + "PatternGrammar.txt") + + +class Symbols(object): + + def __init__(self, grammar): + """Initializer. + + Creates an attribute for each grammar symbol (nonterminal), + whose value is the symbol's type (an int >= 256). + """ + for name, symbol in grammar.symbol2number.items(): + setattr(self, name, symbol) + + +python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) + +python_symbols = Symbols(python_grammar) + +python_grammar_no_print_statement = python_grammar.copy() +del python_grammar_no_print_statement.keywords["print"] + +python_grammar_no_print_and_exec_statement = python_grammar_no_print_statement.copy() +del python_grammar_no_print_and_exec_statement.keywords["exec"] + +pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) +pattern_symbols = Symbols(pattern_grammar) diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pytree.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pytree.py new file mode 100644 index 00000000..729023df --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/pytree.py @@ -0,0 +1,853 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +""" +Python parse tree definitions. + +This is a very concrete parse tree; we need to keep every token and +even the comments and whitespace between tokens. + +There's also a pattern matching implementation here. +""" + +__author__ = "Guido van Rossum " + +import sys +from io import StringIO + +HUGE = 0x7FFFFFFF # maximum repeat count, default max + +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) + +class Base(object): + + """ + Abstract base class for Node and Leaf. + + This provides some default functionality and boilerplate using the + template pattern. + + A node may be a subnode of at most one parent. + """ + + # Default values for instance variables + type = None # int: token number (< 256) or symbol number (>= 256) + parent = None # Parent node pointer, or None + children = () # Tuple of subnodes + was_changed = False + was_checked = False + + def __new__(cls, *args, **kwds): + """Constructor that prevents Base from being instantiated.""" + assert cls is not Base, "Cannot instantiate Base" + return object.__new__(cls) + + def __eq__(self, other): + """ + Compare two nodes for equality. + + This calls the method _eq(). + """ + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + __hash__ = None # For Py3 compatibility. + + def _eq(self, other): + """ + Compare two nodes for equality. + + This is called by __eq__ and __ne__. It is only called if the two nodes + have the same type. This must be implemented by the concrete subclass. + Nodes should be considered equal if they have the same structure, + ignoring the prefix string and other context information. + """ + raise NotImplementedError + + def clone(self): + """ + Return a cloned (deep) copy of self. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def post_order(self): + """ + Return a post-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def pre_order(self): + """ + Return a pre-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def replace(self, new): + """Replace this node with a new one in the parent.""" + assert self.parent is not None, str(self) + assert new is not None + if not isinstance(new, list): + new = [new] + l_children = [] + found = False + for ch in self.parent.children: + if ch is self: + assert not found, (self.parent.children, self, new) + if new is not None: + l_children.extend(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, self, new) + self.parent.changed() + self.parent.children = l_children + for x in new: + x.parent = self.parent + self.parent = None + + def get_lineno(self): + """Return the line number which generated the invocant node.""" + node = self + while not isinstance(node, Leaf): + if not node.children: + return + node = node.children[0] + return node.lineno + + def changed(self): + if self.parent: + self.parent.changed() + self.was_changed = True + + def remove(self): + """ + Remove the node from the tree. Returns the position of the node in its + parent's children before it was removed. + """ + if self.parent: + for i, node in enumerate(self.parent.children): + if node is self: + self.parent.changed() + del self.parent.children[i] + self.parent = None + return i + + @property + def next_sibling(self): + """ + The node immediately following the invocant in their parent's children + list. If the invocant does not have a next sibling, it is None + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i+1] + except IndexError: + return None + + @property + def prev_sibling(self): + """ + The node immediately preceding the invocant in their parent's children + list. If the invocant does not have a previous sibling, it is None. + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i-1] + + def leaves(self): + for child in self.children: + yield from child.leaves() + + def depth(self): + if self.parent is None: + return 0 + return 1 + self.parent.depth() + + def get_suffix(self): + """ + Return the string immediately following the invocant node. This is + effectively equivalent to node.next_sibling.prefix + """ + next_sib = self.next_sibling + if next_sib is None: + return "" + return next_sib.prefix + + if sys.version_info < (3, 0): + def __str__(self): + return str(self).encode("ascii") + +class Node(Base): + + """Concrete implementation for interior nodes.""" + + def __init__(self,type, children, + context=None, + prefix=None, + fixers_applied=None): + """ + Initializer. + + Takes a type constant (a symbol number >= 256), a sequence of + child nodes, and an optional context keyword argument. + + As a side effect, the parent pointers of the children are updated. + """ + assert type >= 256, type + self.type = type + self.children = list(children) + for ch in self.children: + assert ch.parent is None, repr(ch) + ch.parent = self + if prefix is not None: + self.prefix = prefix + if fixers_applied: + self.fixers_applied = fixers_applied[:] + else: + self.fixers_applied = None + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%s, %r)" % (self.__class__.__name__, + type_repr(self.type), + self.children) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return "".join(map(str, self.children)) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.children) == (other.type, other.children) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Node(self.type, [ch.clone() for ch in self.children], + fixers_applied=self.fixers_applied) + + def post_order(self): + """Return a post-order iterator for the tree.""" + for child in self.children: + yield from child.post_order() + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + for child in self.children: + yield from child.pre_order() + + @property + def prefix(self): + """ + The whitespace and comments preceding this node in the input. + """ + if not self.children: + return "" + return self.children[0].prefix + + @prefix.setter + def prefix(self, prefix): + if self.children: + self.children[0].prefix = prefix + + def set_child(self, i, child): + """ + Equivalent to 'node.children[i] = child'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children[i].parent = None + self.children[i] = child + self.changed() + + def insert_child(self, i, child): + """ + Equivalent to 'node.children.insert(i, child)'. This method also sets + the child's parent attribute appropriately. + """ + child.parent = self + self.children.insert(i, child) + self.changed() + + def append_child(self, child): + """ + Equivalent to 'node.children.append(child)'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children.append(child) + self.changed() + + +class Leaf(Base): + + """Concrete implementation for leaf nodes.""" + + # Default values for instance variables + _prefix = "" # Whitespace and comments preceding this token in the input + lineno = 0 # Line where this token starts in the input + column = 0 # Column where this token tarts in the input + + def __init__(self, type, value, + context=None, + prefix=None, + fixers_applied=[]): + """ + Initializer. + + Takes a type constant (a token number < 256), a string value, and an + optional context keyword argument. + """ + assert 0 <= type < 256, type + if context is not None: + self._prefix, (self.lineno, self.column) = context + self.type = type + self.value = value + if prefix is not None: + self._prefix = prefix + self.fixers_applied = fixers_applied[:] + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%r, %r)" % (self.__class__.__name__, + self.type, + self.value) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return self.prefix + str(self.value) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.value) == (other.type, other.value) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Leaf(self.type, self.value, + (self.prefix, (self.lineno, self.column)), + fixers_applied=self.fixers_applied) + + def leaves(self): + yield self + + def post_order(self): + """Return a post-order iterator for the tree.""" + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + + @property + def prefix(self): + """ + The whitespace and comments preceding this token in the input. + """ + return self._prefix + + @prefix.setter + def prefix(self, prefix): + self.changed() + self._prefix = prefix + +def convert(gr, raw_node): + """ + Convert raw node information to a Node or Leaf instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + type, value, context, children = raw_node + if children or type in gr.number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) + + +class BasePattern(object): + + """ + A pattern is a tree matching pattern. + + It looks for a specific node type (token or symbol), and + optionally for a specific content. + + This is an abstract base class. There are three concrete + subclasses: + + - LeafPattern matches a single leaf node; + - NodePattern matches a single node (usually non-leaf); + - WildcardPattern matches a sequence of nodes of variable length. + """ + + # Defaults for instance variables + type = None # Node type (token if < 256, symbol if >= 256) + content = None # Optional content matching pattern + name = None # Optional name used to store match in results dict + + def __new__(cls, *args, **kwds): + """Constructor that prevents BasePattern from being instantiated.""" + assert cls is not BasePattern, "Cannot instantiate BasePattern" + return object.__new__(cls) + + def __repr__(self): + args = [type_repr(self.type), self.content, self.name] + while args and args[-1] is None: + del args[-1] + return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) + + def optimize(self): + """ + A subclass can define this as a hook for optimizations. + + Returns either self or another node with the same effect. + """ + return self + + def match(self, node, results=None): + """ + Does this pattern exactly match a node? + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + Default implementation for non-wildcard patterns. + """ + if self.type is not None and node.type != self.type: + return False + if self.content is not None: + r = None + if results is not None: + r = {} + if not self._submatch(node, r): + return False + if r: + results.update(r) + if results is not None and self.name: + results[self.name] = node + return True + + def match_seq(self, nodes, results=None): + """ + Does this pattern exactly match a sequence of nodes? + + Default implementation for non-wildcard patterns. + """ + if len(nodes) != 1: + return False + return self.match(nodes[0], results) + + def generate_matches(self, nodes): + """ + Generator yielding all matches for this pattern. + + Default implementation for non-wildcard patterns. + """ + r = {} + if nodes and self.match(nodes[0], r): + yield 1, r + + +class LeafPattern(BasePattern): + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given must be a token type (< 256). If not given, + this matches any *leaf* node; the content may still be required. + + The content, if given, must be a string. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert 0 <= type < 256, type + if content is not None: + assert isinstance(content, str), repr(content) + self.type = type + self.content = content + self.name = name + + def match(self, node, results=None): + """Override match() to insist on a leaf node.""" + if not isinstance(node, Leaf): + return False + return BasePattern.match(self, node, results) + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + return self.content == node.value + + +class NodePattern(BasePattern): + + wildcards = False + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given, must be a symbol type (>= 256). If the + type is None this matches *any* single node (leaf or not), + except if content is not None, in which it only matches + non-leaf nodes that also match the content pattern. + + The content, if not None, must be a sequence of Patterns that + must match the node's children exactly. If the content is + given, the type must not be None. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert type >= 256, type + if content is not None: + assert not isinstance(content, str), repr(content) + content = list(content) + for i, item in enumerate(content): + assert isinstance(item, BasePattern), (i, item) + if isinstance(item, WildcardPattern): + self.wildcards = True + self.type = type + self.content = content + self.name = name + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + if self.wildcards: + for c, r in generate_matches(self.content, node.children): + if c == len(node.children): + if results is not None: + results.update(r) + return True + return False + if len(self.content) != len(node.children): + return False + for subpattern, child in zip(self.content, node.children): + if not subpattern.match(child, results): + return False + return True + + +class WildcardPattern(BasePattern): + + """ + A wildcard pattern can match zero or more nodes. + + This has all the flexibility needed to implement patterns like: + + .* .+ .? .{m,n} + (a b c | d e | f) + (...)* (...)+ (...)? (...){m,n} + + except it always uses non-greedy matching. + """ + + def __init__(self, content=None, min=0, max=HUGE, name=None): + """ + Initializer. + + Args: + content: optional sequence of subsequences of patterns; + if absent, matches one node; + if present, each subsequence is an alternative [*] + min: optional minimum number of times to match, default 0 + max: optional maximum number of times to match, default HUGE + name: optional name assigned to this match + + [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is + equivalent to (a b c | d e | f g h); if content is None, + this is equivalent to '.' in regular expression terms. + The min and max parameters work as follows: + min=0, max=maxint: .* + min=1, max=maxint: .+ + min=0, max=1: .? + min=1, max=1: . + If content is not None, replace the dot with the parenthesized + list of alternatives, e.g. (a b c | d e | f g h)* + """ + assert 0 <= min <= max <= HUGE, (min, max) + if content is not None: + content = tuple(map(tuple, content)) # Protect against alterations + # Check sanity of alternatives + assert len(content), repr(content) # Can't have zero alternatives + for alt in content: + assert len(alt), repr(alt) # Can have empty alternatives + self.content = content + self.min = min + self.max = max + self.name = name + + def optimize(self): + """Optimize certain stacked wildcard patterns.""" + subpattern = None + if (self.content is not None and + len(self.content) == 1 and len(self.content[0]) == 1): + subpattern = self.content[0][0] + if self.min == 1 and self.max == 1: + if self.content is None: + return NodePattern(name=self.name) + if subpattern is not None and self.name == subpattern.name: + return subpattern.optimize() + if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and + subpattern.min <= 1 and self.name == subpattern.name): + return WildcardPattern(subpattern.content, + self.min*subpattern.min, + self.max*subpattern.max, + subpattern.name) + return self + + def match(self, node, results=None): + """Does this pattern exactly match a node?""" + return self.match_seq([node], results) + + def match_seq(self, nodes, results=None): + """Does this pattern exactly match a sequence of nodes?""" + for c, r in self.generate_matches(nodes): + if c == len(nodes): + if results is not None: + results.update(r) + if self.name: + results[self.name] = list(nodes) + return True + return False + + def generate_matches(self, nodes): + """ + Generator yielding matches for a sequence of nodes. + + Args: + nodes: sequence of nodes + + Yields: + (count, results) tuples where: + count: the match comprises nodes[:count]; + results: dict containing named submatches. + """ + if self.content is None: + # Shortcut for special case (see __init__.__doc__) + for count in range(self.min, 1 + min(len(nodes), self.max)): + r = {} + if self.name: + r[self.name] = nodes[:count] + yield count, r + elif self.name == "bare_name": + yield self._bare_name_matches(nodes) + else: + # The reason for this is that hitting the recursion limit usually + # results in some ugly messages about how RuntimeErrors are being + # ignored. We only have to do this on CPython, though, because other + # implementations don't have this nasty bug in the first place. + if hasattr(sys, "getrefcount"): + save_stderr = sys.stderr + sys.stderr = StringIO() + try: + for count, r in self._recursive_matches(nodes, 0): + if self.name: + r[self.name] = nodes[:count] + yield count, r + except RuntimeError: + # Fall back to the iterative pattern matching scheme if the + # recursive scheme hits the recursion limit (RecursionError). + for count, r in self._iterative_matches(nodes): + if self.name: + r[self.name] = nodes[:count] + yield count, r + finally: + if hasattr(sys, "getrefcount"): + sys.stderr = save_stderr + + def _iterative_matches(self, nodes): + """Helper to iteratively yield the matches.""" + nodelen = len(nodes) + if 0 >= self.min: + yield 0, {} + + results = [] + # generate matches that use just one alt from self.content + for alt in self.content: + for c, r in generate_matches(alt, nodes): + yield c, r + results.append((c, r)) + + # for each match, iterate down the nodes + while results: + new_results = [] + for c0, r0 in results: + # stop if the entire set of nodes has been matched + if c0 < nodelen and c0 <= self.max: + for alt in self.content: + for c1, r1 in generate_matches(alt, nodes[c0:]): + if c1 > 0: + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + new_results.append((c0 + c1, r)) + results = new_results + + def _bare_name_matches(self, nodes): + """Special optimized matcher for bare_name.""" + count = 0 + r = {} + done = False + max = len(nodes) + while not done and count < max: + done = True + for leaf in self.content: + if leaf[0].match(nodes[count], r): + count += 1 + done = False + break + r[self.name] = nodes[:count] + return count, r + + def _recursive_matches(self, nodes, count): + """Helper to recursively yield the matches.""" + assert self.content is not None + if count >= self.min: + yield 0, {} + if count < self.max: + for alt in self.content: + for c0, r0 in generate_matches(alt, nodes): + for c1, r1 in self._recursive_matches(nodes[c0:], count+1): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + + +class NegatedPattern(BasePattern): + + def __init__(self, content=None): + """ + Initializer. + + The argument is either a pattern or None. If it is None, this + only matches an empty sequence (effectively '$' in regex + lingo). If it is not None, this matches whenever the argument + pattern doesn't have any matches. + """ + if content is not None: + assert isinstance(content, BasePattern), repr(content) + self.content = content + + def match(self, node): + # We never match a node in its entirety + return False + + def match_seq(self, nodes): + # We only match an empty sequence of nodes in its entirety + return len(nodes) == 0 + + def generate_matches(self, nodes): + if self.content is None: + # Return a match if there is an empty sequence + if len(nodes) == 0: + yield 0, {} + else: + # Return a match if the argument pattern has no matches + for c, r in self.content.generate_matches(nodes): + return + yield 0, {} + + +def generate_matches(patterns, nodes): + """ + Generator yielding matches for a sequence of patterns and nodes. + + Args: + patterns: a sequence of patterns + nodes: a sequence of nodes + + Yields: + (count, results) tuples where: + count: the entire sequence of patterns matches nodes[:count]; + results: dict containing named submatches. + """ + if not patterns: + yield 0, {} + else: + p, rest = patterns[0], patterns[1:] + for c0, r0 in p.generate_matches(nodes): + if not rest: + yield c0, r0 + else: + for c1, r1 in generate_matches(rest, nodes[c0:]): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r diff --git a/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/refactor.py b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/refactor.py new file mode 100644 index 00000000..3a5aafff --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_2to3/vendor/src/lib2to3/refactor.py @@ -0,0 +1,732 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Refactoring framework. + +Used as a main program, this can refactor any number of files and/or +recursively descend down directories. Imported as a module, this +provides infrastructure to write your own refactoring tool. +""" + +__author__ = "Guido van Rossum " + + +# Python imports +import io +import os +import pkgutil +import sys +import logging +import operator +import collections +from itertools import chain + +# Local imports +from .pgen2 import driver, tokenize, token +from .fixer_util import find_root +from . import pytree, pygram +from . import btm_matcher as bm + + +def get_all_fix_names(fixer_pkg, remove_prefix=True): + """Return a sorted list of all available fix names in the given package.""" + pkg = __import__(fixer_pkg, [], [], ["*"]) + fix_names = [] + for finder, name, ispkg in pkgutil.iter_modules(pkg.__path__): + if name.startswith("fix_"): + if remove_prefix: + name = name[4:] + fix_names.append(name) + return fix_names + + +class _EveryNode(Exception): + pass + + +def _get_head_types(pat): + """ Accepts a pytree Pattern Node and returns a set + of the pattern types which will match first. """ + + if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)): + # NodePatters must either have no type and no content + # or a type and content -- so they don't get any farther + # Always return leafs + if pat.type is None: + raise _EveryNode + return {pat.type} + + if isinstance(pat, pytree.NegatedPattern): + if pat.content: + return _get_head_types(pat.content) + raise _EveryNode # Negated Patterns don't have a type + + if isinstance(pat, pytree.WildcardPattern): + # Recurse on each node in content + r = set() + for p in pat.content: + for x in p: + r.update(_get_head_types(x)) + return r + + raise Exception("Oh no! I don't understand pattern %s" %(pat)) + + +def _get_headnode_dict(fixer_list): + """ Accepts a list of fixers and returns a dictionary + of head node type --> fixer list. """ + head_nodes = collections.defaultdict(list) + every = [] + for fixer in fixer_list: + if fixer.pattern: + try: + heads = _get_head_types(fixer.pattern) + except _EveryNode: + every.append(fixer) + else: + for node_type in heads: + head_nodes[node_type].append(fixer) + else: + if fixer._accept_type is not None: + head_nodes[fixer._accept_type].append(fixer) + else: + every.append(fixer) + for node_type in chain(pygram.python_grammar.symbol2number.values(), + pygram.python_grammar.tokens): + head_nodes[node_type].extend(every) + return dict(head_nodes) + + +def get_fixers_from_package(pkg_name): + """ + Return the fully qualified names for fixers in the package pkg_name. + """ + return [pkg_name + "." + fix_name + for fix_name in get_all_fix_names(pkg_name, False)] + +def _identity(obj): + return obj + + +def _detect_future_features(source): + have_docstring = False + gen = tokenize.generate_tokens(io.StringIO(source).readline) + def advance(): + tok = next(gen) + return tok[0], tok[1] + ignore = frozenset({token.NEWLINE, tokenize.NL, token.COMMENT}) + features = set() + try: + while True: + tp, value = advance() + if tp in ignore: + continue + elif tp == token.STRING: + if have_docstring: + break + have_docstring = True + elif tp == token.NAME and value == "from": + tp, value = advance() + if tp != token.NAME or value != "__future__": + break + tp, value = advance() + if tp != token.NAME or value != "import": + break + tp, value = advance() + if tp == token.OP and value == "(": + tp, value = advance() + while tp == token.NAME: + features.add(value) + tp, value = advance() + if tp != token.OP or value != ",": + break + tp, value = advance() + else: + break + except StopIteration: + pass + return frozenset(features) + + +class FixerError(Exception): + """A fixer could not be loaded.""" + + +class RefactoringTool(object): + + _default_options = {"print_function" : False, + "exec_function": False, + "write_unchanged_files" : False} + + CLASS_PREFIX = "Fix" # The prefix for fixer classes + FILE_PREFIX = "fix_" # The prefix for modules with a fixer within + + def __init__(self, fixer_names, options=None, explicit=None): + """Initializer. + + Args: + fixer_names: a list of fixers to import + options: a dict with configuration. + explicit: a list of fixers to run even if they are explicit. + """ + self.fixers = fixer_names + self.explicit = explicit or [] + self.options = self._default_options.copy() + if options is not None: + self.options.update(options) + self.grammar = pygram.python_grammar.copy() + + if self.options['print_function']: + del self.grammar.keywords["print"] + elif self.options['exec_function']: + del self.grammar.keywords["exec"] + + # When this is True, the refactor*() methods will call write_file() for + # files processed even if they were not changed during refactoring. If + # and only if the refactor method's write parameter was True. + self.write_unchanged_files = self.options.get("write_unchanged_files") + self.errors = [] + self.logger = logging.getLogger("RefactoringTool") + self.fixer_log = [] + self.wrote = False + self.driver = driver.Driver(self.grammar, + convert=pytree.convert, + logger=self.logger) + self.pre_order, self.post_order = self.get_fixers() + + + self.files = [] # List of files that were or should be modified + + self.BM = bm.BottomMatcher() + self.bmi_pre_order = [] # Bottom Matcher incompatible fixers + self.bmi_post_order = [] + + for fixer in chain(self.post_order, self.pre_order): + if fixer.BM_compatible: + self.BM.add_fixer(fixer) + # remove fixers that will be handled by the bottom-up + # matcher + elif fixer in self.pre_order: + self.bmi_pre_order.append(fixer) + elif fixer in self.post_order: + self.bmi_post_order.append(fixer) + + self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order) + self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order) + + + + def get_fixers(self): + """Inspects the options to load the requested patterns and handlers. + + Returns: + (pre_order, post_order), where pre_order is the list of fixers that + want a pre-order AST traversal, and post_order is the list that want + post-order traversal. + """ + pre_order_fixers = [] + post_order_fixers = [] + for fix_mod_path in self.fixers: + mod = __import__(fix_mod_path, {}, {}, ["*"]) + fix_name = fix_mod_path.rsplit(".", 1)[-1] + if fix_name.startswith(self.FILE_PREFIX): + fix_name = fix_name[len(self.FILE_PREFIX):] + parts = fix_name.split("_") + class_name = self.CLASS_PREFIX + "".join([p.title() for p in parts]) + try: + fix_class = getattr(mod, class_name) + except AttributeError: + raise FixerError("Can't find %s.%s" % (fix_name, class_name)) from None + fixer = fix_class(self.options, self.fixer_log) + if fixer.explicit and self.explicit is not True and \ + fix_mod_path not in self.explicit: + self.log_message("Skipping optional fixer: %s", fix_name) + continue + + self.log_debug("Adding transformation: %s", fix_name) + if fixer.order == "pre": + pre_order_fixers.append(fixer) + elif fixer.order == "post": + post_order_fixers.append(fixer) + else: + raise FixerError("Illegal fixer order: %r" % fixer.order) + + key_func = operator.attrgetter("run_order") + pre_order_fixers.sort(key=key_func) + post_order_fixers.sort(key=key_func) + return (pre_order_fixers, post_order_fixers) + + def log_error(self, msg, *args, **kwds): + """Called when an error occurs.""" + raise + + def log_message(self, msg, *args): + """Hook to log a message.""" + if args: + msg = msg % args + self.logger.info(msg) + + def log_debug(self, msg, *args): + if args: + msg = msg % args + self.logger.debug(msg) + + def print_output(self, old_text, new_text, filename, equal): + """Called with the old version, new version, and filename of a + refactored file.""" + pass + + def refactor(self, items, write=False, doctests_only=False): + """Refactor a list of files and directories.""" + + for dir_or_file in items: + if os.path.isdir(dir_or_file): + self.refactor_dir(dir_or_file, write, doctests_only) + else: + self.refactor_file(dir_or_file, write, doctests_only) + + def refactor_dir(self, dir_name, write=False, doctests_only=False): + """Descends down a directory and refactor every Python file found. + + Python files are assumed to have a .py extension. + + Files and subdirectories starting with '.' are skipped. + """ + py_ext = os.extsep + "py" + for dirpath, dirnames, filenames in os.walk(dir_name): + self.log_debug("Descending into %s", dirpath) + dirnames.sort() + filenames.sort() + for name in filenames: + if (not name.startswith(".") and + os.path.splitext(name)[1] == py_ext): + fullname = os.path.join(dirpath, name) + self.refactor_file(fullname, write, doctests_only) + # Modify dirnames in-place to remove subdirs with leading dots + dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] + + def _read_python_source(self, filename): + """ + Do our best to decode a Python source file correctly. + """ + try: + f = open(filename, "rb") + except OSError as err: + self.log_error("Can't open %s: %s", filename, err) + return None, None + try: + encoding = tokenize.detect_encoding(f.readline)[0] + finally: + f.close() + with io.open(filename, "r", encoding=encoding, newline='') as f: + return f.read(), encoding + + def refactor_file(self, filename, write=False, doctests_only=False): + """Refactors a file.""" + input, encoding = self._read_python_source(filename) + if input is None: + # Reading the file failed. + return + input += "\n" # Silence certain parse errors + if doctests_only: + self.log_debug("Refactoring doctests in %s", filename) + output = self.refactor_docstring(input, filename) + if self.write_unchanged_files or output != input: + self.processed_file(output, filename, input, write, encoding) + else: + self.log_debug("No doctest changes in %s", filename) + else: + tree = self.refactor_string(input, filename) + if self.write_unchanged_files or (tree and tree.was_changed): + # The [:-1] is to take off the \n we added earlier + self.processed_file(str(tree)[:-1], filename, + write=write, encoding=encoding) + else: + self.log_debug("No changes in %s", filename) + + def refactor_string(self, data, name): + """Refactor a given input string. + + Args: + data: a string holding the code to be refactored. + name: a human-readable name for use in error/log messages. + + Returns: + An AST corresponding to the refactored input stream; None if + there were errors during the parse. + """ + features = _detect_future_features(data) + if "print_function" in features: + self.driver.grammar = pygram.python_grammar_no_print_statement + try: + tree = self.driver.parse_string(data) + except Exception as err: + self.log_error("Can't parse %s: %s: %s", + name, err.__class__.__name__, err) + return + finally: + self.driver.grammar = self.grammar + tree.future_features = features + self.log_debug("Refactoring %s", name) + self.refactor_tree(tree, name) + return tree + + def refactor_stdin(self, doctests_only=False): + input = sys.stdin.read() + if doctests_only: + self.log_debug("Refactoring doctests in stdin") + output = self.refactor_docstring(input, "") + if self.write_unchanged_files or output != input: + self.processed_file(output, "", input) + else: + self.log_debug("No doctest changes in stdin") + else: + tree = self.refactor_string(input, "") + if self.write_unchanged_files or (tree and tree.was_changed): + self.processed_file(str(tree), "", input) + else: + self.log_debug("No changes in stdin") + + def refactor_tree(self, tree, name): + """Refactors a parse tree (modifying the tree in place). + + For compatible patterns the bottom matcher module is + used. Otherwise the tree is traversed node-to-node for + matches. + + Args: + tree: a pytree.Node instance representing the root of the tree + to be refactored. + name: a human-readable name for this tree. + + Returns: + True if the tree was modified, False otherwise. + """ + + for fixer in chain(self.pre_order, self.post_order): + fixer.start_tree(tree, name) + + #use traditional matching for the incompatible fixers + self.traverse_by(self.bmi_pre_order_heads, tree.pre_order()) + self.traverse_by(self.bmi_post_order_heads, tree.post_order()) + + # obtain a set of candidate nodes + match_set = self.BM.run(tree.leaves()) + + while any(match_set.values()): + for fixer in self.BM.fixers: + if fixer in match_set and match_set[fixer]: + #sort by depth; apply fixers from bottom(of the AST) to top + match_set[fixer].sort(key=pytree.Base.depth, reverse=True) + + if fixer.keep_line_order: + #some fixers(eg fix_imports) must be applied + #with the original file's line order + match_set[fixer].sort(key=pytree.Base.get_lineno) + + for node in list(match_set[fixer]): + if node in match_set[fixer]: + match_set[fixer].remove(node) + + try: + find_root(node) + except ValueError: + # this node has been cut off from a + # previous transformation ; skip + continue + + if node.fixers_applied and fixer in node.fixers_applied: + # do not apply the same fixer again + continue + + results = fixer.match(node) + + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + #new.fixers_applied.append(fixer) + for node in new.post_order(): + # do not apply the fixer again to + # this or any subnode + if not node.fixers_applied: + node.fixers_applied = [] + node.fixers_applied.append(fixer) + + # update the original match set for + # the added code + new_matches = self.BM.run(new.leaves()) + for fxr in new_matches: + if not fxr in match_set: + match_set[fxr]=[] + + match_set[fxr].extend(new_matches[fxr]) + + for fixer in chain(self.pre_order, self.post_order): + fixer.finish_tree(tree, name) + return tree.was_changed + + def traverse_by(self, fixers, traversal): + """Traverse an AST, applying a set of fixers to each node. + + This is a helper method for refactor_tree(). + + Args: + fixers: a list of fixer instances. + traversal: a generator that yields AST nodes. + + Returns: + None + """ + if not fixers: + return + for node in traversal: + for fixer in fixers[node.type]: + results = fixer.match(node) + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + node = new + + def processed_file(self, new_text, filename, old_text=None, write=False, + encoding=None): + """ + Called when a file has been refactored and there may be changes. + """ + self.files.append(filename) + if old_text is None: + old_text = self._read_python_source(filename)[0] + if old_text is None: + return + equal = old_text == new_text + self.print_output(old_text, new_text, filename, equal) + if equal: + self.log_debug("No changes to %s", filename) + if not self.write_unchanged_files: + return + if write: + self.write_file(new_text, filename, old_text, encoding) + else: + self.log_debug("Not writing changes to %s", filename) + + def write_file(self, new_text, filename, old_text, encoding=None): + """Writes a string to a file. + + It first shows a unified diff between the old text and the new text, and + then rewrites the file; the latter is only done if the write option is + set. + """ + try: + fp = io.open(filename, "w", encoding=encoding, newline='') + except OSError as err: + self.log_error("Can't create %s: %s", filename, err) + return + + with fp: + try: + fp.write(new_text) + except OSError as err: + self.log_error("Can't write %s: %s", filename, err) + self.log_debug("Wrote changes to %s", filename) + self.wrote = True + + PS1 = ">>> " + PS2 = "... " + + def refactor_docstring(self, input, filename): + """Refactors a docstring, looking for doctests. + + This returns a modified version of the input string. It looks + for doctests, which start with a ">>>" prompt, and may be + continued with "..." prompts, as long as the "..." is indented + the same as the ">>>". + + (Unfortunately we can't use the doctest module's parser, + since, like most parsers, it is not geared towards preserving + the original source.) + """ + result = [] + block = None + block_lineno = None + indent = None + lineno = 0 + for line in input.splitlines(keepends=True): + lineno += 1 + if line.lstrip().startswith(self.PS1): + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block_lineno = lineno + block = [line] + i = line.find(self.PS1) + indent = line[:i] + elif (indent is not None and + (line.startswith(indent + self.PS2) or + line == indent + self.PS2.rstrip() + "\n")): + block.append(line) + else: + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block = None + indent = None + result.append(line) + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + return "".join(result) + + def refactor_doctest(self, block, lineno, indent, filename): + """Refactors one doctest. + + A doctest is given as a block of lines, the first of which starts + with ">>>" (possibly indented), while the remaining lines start + with "..." (identically indented). + + """ + try: + tree = self.parse_block(block, lineno, indent) + except Exception as err: + if self.logger.isEnabledFor(logging.DEBUG): + for line in block: + self.log_debug("Source: %s", line.rstrip("\n")) + self.log_error("Can't parse docstring in %s line %s: %s: %s", + filename, lineno, err.__class__.__name__, err) + return block + if self.refactor_tree(tree, filename): + new = str(tree).splitlines(keepends=True) + # Undo the adjustment of the line numbers in wrap_toks() below. + clipped, new = new[:lineno-1], new[lineno-1:] + assert clipped == ["\n"] * (lineno-1), clipped + if not new[-1].endswith("\n"): + new[-1] += "\n" + block = [indent + self.PS1 + new.pop(0)] + if new: + block += [indent + self.PS2 + line for line in new] + return block + + def summarize(self): + if self.wrote: + were = "were" + else: + were = "need to be" + if not self.files: + self.log_message("No files %s modified.", were) + else: + self.log_message("Files that %s modified:", were) + for file in self.files: + self.log_message(file) + if self.fixer_log: + self.log_message("Warnings/messages while refactoring:") + for message in self.fixer_log: + self.log_message(message) + if self.errors: + if len(self.errors) == 1: + self.log_message("There was 1 error:") + else: + self.log_message("There were %d errors:", len(self.errors)) + for msg, args, kwds in self.errors: + self.log_message(msg, *args, **kwds) + + def parse_block(self, block, lineno, indent): + """Parses a block into a tree. + + This is necessary to get correct line number / offset information + in the parser diagnostics and embedded into the parse tree. + """ + tree = self.driver.parse_tokens(self.wrap_toks(block, lineno, indent)) + tree.future_features = frozenset() + return tree + + def wrap_toks(self, block, lineno, indent): + """Wraps a tokenize stream to systematically modify start/end.""" + tokens = tokenize.generate_tokens(self.gen_lines(block, indent).__next__) + for type, value, (line0, col0), (line1, col1), line_text in tokens: + line0 += lineno - 1 + line1 += lineno - 1 + # Don't bother updating the columns; this is too complicated + # since line_text would also have to be updated and it would + # still break for tokens spanning lines. Let the user guess + # that the column numbers for doctests are relative to the + # end of the prompt string (PS1 or PS2). + yield type, value, (line0, col0), (line1, col1), line_text + + + def gen_lines(self, block, indent): + """Generates lines as expected by tokenize from a list of lines. + + This strips the first len(indent + self.PS1) characters off each line. + """ + prefix1 = indent + self.PS1 + prefix2 = indent + self.PS2 + prefix = prefix1 + for line in block: + if line.startswith(prefix): + yield line[len(prefix):] + elif line == prefix.rstrip() + "\n": + yield "\n" + else: + raise AssertionError("line=%r, prefix=%r" % (line, prefix)) + prefix = prefix2 + while True: + yield "" + + +class MultiprocessingUnsupported(Exception): + pass + + +class MultiprocessRefactoringTool(RefactoringTool): + + def __init__(self, *args, **kwargs): + super(MultiprocessRefactoringTool, self).__init__(*args, **kwargs) + self.queue = None + self.output_lock = None + + def refactor(self, items, write=False, doctests_only=False, + num_processes=1): + if num_processes == 1: + return super(MultiprocessRefactoringTool, self).refactor( + items, write, doctests_only) + try: + import multiprocessing + except ImportError: + raise MultiprocessingUnsupported + if self.queue is not None: + raise RuntimeError("already doing multiple processes") + self.queue = multiprocessing.JoinableQueue() + self.output_lock = multiprocessing.Lock() + processes = [multiprocessing.Process(target=self._child) + for i in range(num_processes)] + try: + for p in processes: + p.start() + super(MultiprocessRefactoringTool, self).refactor(items, write, + doctests_only) + finally: + self.queue.join() + for i in range(num_processes): + self.queue.put(None) + for p in processes: + if p.is_alive(): + p.join() + self.queue = None + + def _child(self): + task = self.queue.get() + while task is not None: + args, kwargs = task + try: + super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) + finally: + self.queue.task_done() + task = self.queue.get() + + def refactor_file(self, *args, **kwargs): + if self.queue is not None: + self.queue.put((args, kwargs)) + else: + return super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_cpu_io_mixed_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_cpu_io_mixed_tg.toml new file mode 100644 index 00000000..c1fb778a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_cpu_io_mixed_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.11" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_cpu_io_mixed_tg" +extra_opts = ["cpu_io_mixed", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager.toml index 09d16ee8..e4d5fd2f 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager.toml +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager.toml @@ -1,3 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + [tool.pyperformance] name = "async_tree_eager" extra_opts = ["eager"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed.toml index 4766cb23..22e36fba 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed.toml +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed.toml @@ -1,3 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + [tool.pyperformance] name = "async_tree_eager_cpu_io_mixed" extra_opts = ["eager_cpu_io_mixed"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed_tg.toml new file mode 100644 index 00000000..5d1bfeed --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_cpu_io_mixed_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_eager_cpu_io_mixed_tg" +extra_opts = ["eager_cpu_io_mixed", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io.toml index de1dfb2a..57bf2da1 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io.toml +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io.toml @@ -1,3 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + [tool.pyperformance] name = "async_tree_eager_io" extra_opts = ["eager_io"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io_tg.toml new file mode 100644 index 00000000..e586e87d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_io_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_eager_io_tg" +extra_opts = ["eager_io", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization.toml index ec199382..4a8c6e63 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization.toml +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization.toml @@ -1,3 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + [tool.pyperformance] name = "async_tree_eager_memoization" extra_opts = ["eager_memoization"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization_tg.toml new file mode 100644 index 00000000..d24a3fa6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_memoization_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_eager_memoization_tg" +extra_opts = ["eager_memoization", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_tg.toml new file mode 100644 index 00000000..868414c9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_eager_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.12" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_eager_tg" +extra_opts = ["eager", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_io_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_io_tg.toml new file mode 100644 index 00000000..7a3e22d9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_io_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.11" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_io_tg" +extra_opts = ["io", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_memoization_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_memoization_tg.toml new file mode 100644 index 00000000..7963305d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_memoization_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.11" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_memoization_tg" +extra_opts = ["memoization", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_tg.toml b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_tg.toml new file mode 100644 index 00000000..0c7495be --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_async_tree/bm_async_tree_tg.toml @@ -0,0 +1,7 @@ +[project] +requires-python = ">=3.11" +dynamic = ["version"] + +[tool.pyperformance] +name = "async_tree_tg" +extra_opts = ["none", "--task-groups"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/pyproject.toml b/pyperformance/data-files/benchmarks/bm_async_tree/pyproject.toml index c1f73aac..565512a0 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_async_tree/pyproject.toml @@ -7,4 +7,5 @@ dynamic = ["version"] [tool.pyperformance] name = "async_tree" +tags = "asyncio" extra_opts = ["none"] diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py index 72fc917c..131e59c1 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py @@ -12,8 +12,8 @@ the other half simulate the same workload as the "memoization" variant. -All variants also have an "eager" flavor that uses -the asyncio eager task factory (if available). +All variants also have an "eager" flavor that uses the asyncio eager task +factory (if available), and a "tg" variant that uses TaskGroups. """ @@ -34,8 +34,9 @@ class AsyncTree: - def __init__(self): + def __init__(self, use_task_groups=False): self.cache = {} + self.use_task_groups = use_task_groups # set to deterministic random, so that the results are reproducible random.seed(RANDOM_SEED) @@ -47,17 +48,31 @@ async def workload_func(self): "To be implemented by each variant's derived class." ) - async def recurse(self, recurse_level): + async def recurse_with_gather(self, recurse_level): if recurse_level == 0: await self.workload_func() return await asyncio.gather( - *[self.recurse(recurse_level - 1) for _ in range(NUM_RECURSE_BRANCHES)] + *[self.recurse_with_gather(recurse_level - 1) + for _ in range(NUM_RECURSE_BRANCHES)] ) + async def recurse_with_task_group(self, recurse_level): + if recurse_level == 0: + await self.workload_func() + return + + async with asyncio.TaskGroup() as tg: + for _ in range(NUM_RECURSE_BRANCHES): + tg.create_task( + self.recurse_with_task_group(recurse_level - 1)) + async def run(self): - await self.recurse(NUM_RECURSE_LEVELS) + if self.use_task_groups: + await self.recurse_with_task_group(NUM_RECURSE_LEVELS) + else: + await self.recurse_with_gather(NUM_RECURSE_LEVELS) class EagerMixin: @@ -132,6 +147,8 @@ def add_metadata(runner): def add_cmdline_args(cmd, args): cmd.append(args.benchmark) + if args.task_groups: + cmd.append("--task-groups") def add_parser_args(parser): @@ -149,6 +166,12 @@ def add_parser_args(parser): "memoization" variant. """, ) + parser.add_argument( + "--task-groups", + action="store_true", + default=False, + help="Use TaskGroups instead of gather.", + ) BENCHMARKS = { @@ -171,5 +194,8 @@ def add_parser_args(parser): benchmark = args.benchmark async_tree_class = BENCHMARKS[benchmark] - async_tree = async_tree_class() - runner.bench_async_func(f"async_tree_{benchmark}", async_tree.run) + async_tree = async_tree_class(use_task_groups=args.task_groups) + bench_name = f"async_tree_{benchmark}" + if args.task_groups: + bench_name += "_tg" + runner.bench_async_func(bench_name, async_tree.run) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy