From d20fbb8d16cba39e41b457b340f0ad2c2ffa1858 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 17:52:29 +0800 Subject: [PATCH 01/48] gh-107557: Tier 2 abstract interpreter barebones --- .gitattributes | 1 + Include/internal/pycore_optimizer.h | 16 + Makefile.pre.in | 9 +- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 3 + PCbuild/pythoncore.vcxproj.filters | 9 + Python/abstract_interp_cases.c.h | 1085 +++++++++++++++++++++++ Python/optimizer.c | 8 +- Python/optimizer_analysis.c | 21 + Tools/cases_generator/generate_cases.py | 47 +- Tools/cases_generator/instructions.py | 31 + 12 files changed, 1229 insertions(+), 5 deletions(-) create mode 100644 Include/internal/pycore_optimizer.h create mode 100644 Python/abstract_interp_cases.c.h create mode 100644 Python/optimizer_analysis.c diff --git a/.gitattributes b/.gitattributes index 5d5558da711b17..e8b6b5bd7fa54f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -87,6 +87,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/abstract_interp_cases.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h new file mode 100644 index 00000000000000..3b5fa634e13972 --- /dev/null +++ b/Include/internal/pycore_optimizer.h @@ -0,0 +1,16 @@ +#ifndef Py_INTERNAL_OPTIMIZER_H +#define Py_INTERNAL_OPTIMIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern int uop_analyze_and_optimize(struct _PyUOpInstruction *trace, int trace_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OPTIMIZER_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 12409774746a30..a334aae9dec4fc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -405,6 +405,7 @@ PYTHON_OBJS= \ Python/mysnprintf.o \ Python/mystrtoul.o \ Python/optimizer.o \ + Python/optimizer_analysis.o \ Python/pathconfig.o \ Python/preconfig.o \ Python/pyarena.o \ @@ -1562,6 +1563,7 @@ Python/ceval.o: \ Python/executor.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Python/ceval_macros.h \ $(srcdir)/Python/executor_cases.c.h @@ -1570,7 +1572,12 @@ Python/flowgraph.o: \ Python/optimizer.o: \ $(srcdir)/Python/executor_cases.c.h \ - $(srcdir)/Include/internal/pycore_opcode_metadata.h + $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h + +Python/optimizer_analysis.o: \ + $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_optimizer.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index e247637a0dfe5c..bdcf29ba44dab5 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -218,6 +218,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 2a0e009308022b..45333fa97f1c64 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -283,6 +283,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index bfe59acf12a69d..b0e62864421e17 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -248,6 +248,7 @@ + @@ -279,6 +280,7 @@ + @@ -548,6 +550,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 0a8b0c3faf51e1..d5f61e9c5d7c89 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -648,6 +648,9 @@ Include\internal + + Include\internal + Include\internal @@ -732,6 +735,9 @@ Include\internal + + Include\internal + Modules\zlib @@ -1223,6 +1229,9 @@ Python + + Python + Python diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h new file mode 100644 index 00000000000000..4b05477da45e3a --- /dev/null +++ b/Python/abstract_interp_cases.c.h @@ -0,0 +1,1085 @@ +// This file is generated by Tools/cases_generator/generate_cases.py +// from: +// Python\bytecodes.c +// Do not edit! + + case NOP: { + break; + } + + case RESUME: { + break; + } + + case INSTRUMENTED_RESUME: { + break; + } + + case LOAD_FAST_CHECK: { + STACK_GROW(1); + break; + } + + case LOAD_FAST: { + STACK_GROW(1); + break; + } + + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + break; + } + + case LOAD_FAST_LOAD_FAST: { + STACK_GROW(2); + break; + } + + case LOAD_CONST: { + STACK_GROW(1); + break; + } + + case STORE_FAST: { + STACK_SHRINK(1); + break; + } + + case STORE_FAST_LOAD_FAST: { + break; + } + + case STORE_FAST_STORE_FAST: { + STACK_SHRINK(2); + break; + } + + case POP_TOP: { + STACK_SHRINK(1); + break; + } + + case PUSH_NULL: { + STACK_GROW(1); + break; + } + + case INSTRUMENTED_END_FOR: { + STACK_SHRINK(2); + break; + } + + case END_SEND: { + STACK_SHRINK(1); + break; + } + + case INSTRUMENTED_END_SEND: { + STACK_SHRINK(1); + break; + } + + case UNARY_NEGATIVE: { + break; + } + + case UNARY_NOT: { + break; + } + + case TO_BOOL: { + break; + } + + case TO_BOOL_BOOL: { + break; + } + + case TO_BOOL_INT: { + break; + } + + case TO_BOOL_LIST: { + break; + } + + case TO_BOOL_NONE: { + break; + } + + case TO_BOOL_STR: { + break; + } + + case TO_BOOL_ALWAYS_TRUE: { + break; + } + + case UNARY_INVERT: { + break; + } + + case _GUARD_BOTH_INT: { + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_ADD_INT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + STACK_SHRINK(1); + break; + } + + case _GUARD_BOTH_FLOAT: { + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_ADD_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + STACK_SHRINK(1); + break; + } + + case _GUARD_BOTH_UNICODE: { + break; + } + + case _BINARY_OP_ADD_UNICODE: { + STACK_SHRINK(1); + break; + } + + case _BINARY_OP_INPLACE_ADD_UNICODE: { + STACK_SHRINK(2); + break; + } + + case BINARY_SUBSCR: { + STACK_SHRINK(1); + break; + } + + case BINARY_SLICE: { + STACK_SHRINK(2); + break; + } + + case STORE_SLICE: { + STACK_SHRINK(4); + break; + } + + case BINARY_SUBSCR_LIST_INT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_TUPLE_INT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_DICT: { + STACK_SHRINK(1); + break; + } + + case BINARY_SUBSCR_GETITEM: { + STACK_SHRINK(1); + break; + } + + case LIST_APPEND: { + STACK_SHRINK(1); + break; + } + + case SET_ADD: { + STACK_SHRINK(1); + break; + } + + case STORE_SUBSCR: { + STACK_SHRINK(3); + break; + } + + case STORE_SUBSCR_LIST_INT: { + STACK_SHRINK(3); + break; + } + + case STORE_SUBSCR_DICT: { + STACK_SHRINK(3); + break; + } + + case DELETE_SUBSCR: { + STACK_SHRINK(2); + break; + } + + case CALL_INTRINSIC_1: { + break; + } + + case CALL_INTRINSIC_2: { + STACK_SHRINK(1); + break; + } + + case RAISE_VARARGS: { + STACK_SHRINK(oparg); + break; + } + + case INTERPRETER_EXIT: { + STACK_SHRINK(1); + break; + } + + case RETURN_VALUE: { + STACK_SHRINK(1); + break; + } + + case INSTRUMENTED_RETURN_VALUE: { + STACK_SHRINK(1); + break; + } + + case RETURN_CONST: { + break; + } + + case INSTRUMENTED_RETURN_CONST: { + break; + } + + case GET_AITER: { + break; + } + + case GET_ANEXT: { + STACK_GROW(1); + break; + } + + case GET_AWAITABLE: { + break; + } + + case SEND: { + break; + } + + case SEND_GEN: { + break; + } + + case INSTRUMENTED_YIELD_VALUE: { + break; + } + + case YIELD_VALUE: { + break; + } + + case POP_EXCEPT: { + STACK_SHRINK(1); + break; + } + + case RERAISE: { + STACK_SHRINK(1); + break; + } + + case END_ASYNC_FOR: { + STACK_SHRINK(2); + break; + } + + case CLEANUP_THROW: { + STACK_SHRINK(1); + break; + } + + case LOAD_ASSERTION_ERROR: { + STACK_GROW(1); + break; + } + + case LOAD_BUILD_CLASS: { + STACK_GROW(1); + break; + } + + case STORE_NAME: { + STACK_SHRINK(1); + break; + } + + case DELETE_NAME: { + break; + } + + case UNPACK_SEQUENCE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_TWO_TUPLE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_TUPLE: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_SEQUENCE_LIST: { + STACK_SHRINK(1); + STACK_GROW(oparg); + break; + } + + case UNPACK_EX: { + STACK_GROW((oparg & 0xFF) + (oparg >> 8)); + break; + } + + case STORE_ATTR: { + STACK_SHRINK(2); + break; + } + + case DELETE_ATTR: { + STACK_SHRINK(1); + break; + } + + case STORE_GLOBAL: { + STACK_SHRINK(1); + break; + } + + case DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + STACK_GROW(1); + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + break; + } + + case LOAD_GLOBAL: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case DELETE_FAST: { + break; + } + + case MAKE_CELL: { + break; + } + + case DELETE_DEREF: { + break; + } + + case LOAD_FROM_DICT_OR_DEREF: { + break; + } + + case LOAD_DEREF: { + STACK_GROW(1); + break; + } + + case STORE_DEREF: { + STACK_SHRINK(1); + break; + } + + case COPY_FREE_VARS: { + break; + } + + case BUILD_STRING: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_TUPLE: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_LIST: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case LIST_EXTEND: { + STACK_SHRINK(1); + break; + } + + case SET_UPDATE: { + STACK_SHRINK(1); + break; + } + + case BUILD_SET: { + STACK_SHRINK(oparg); + STACK_GROW(1); + break; + } + + case BUILD_MAP: { + STACK_SHRINK(oparg*2); + STACK_GROW(1); + break; + } + + case SETUP_ANNOTATIONS: { + break; + } + + case BUILD_CONST_KEY_MAP: { + STACK_SHRINK(oparg); + break; + } + + case DICT_UPDATE: { + STACK_SHRINK(1); + break; + } + + case DICT_MERGE: { + STACK_SHRINK(1); + break; + } + + case MAP_ADD: { + STACK_SHRINK(2); + break; + } + + case INSTRUMENTED_LOAD_SUPER_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR_ATTR: { + STACK_SHRINK(2); + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_SUPER_ATTR_METHOD: { + STACK_SHRINK(1); + break; + } + + case LOAD_ATTR: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case _GUARD_TYPE_VERSION: { + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_MODULE: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_WITH_HINT: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_SLOT: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_CLASS: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_PROPERTY: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + case STORE_ATTR_INSTANCE_VALUE: { + STACK_SHRINK(2); + break; + } + + case STORE_ATTR_WITH_HINT: { + STACK_SHRINK(2); + break; + } + + case STORE_ATTR_SLOT: { + STACK_SHRINK(2); + break; + } + + case COMPARE_OP: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_FLOAT: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_INT: { + STACK_SHRINK(1); + break; + } + + case COMPARE_OP_STR: { + STACK_SHRINK(1); + break; + } + + case IS_OP: { + STACK_SHRINK(1); + break; + } + + case CONTAINS_OP: { + STACK_SHRINK(1); + break; + } + + case CHECK_EG_MATCH: { + break; + } + + case CHECK_EXC_MATCH: { + break; + } + + case IMPORT_NAME: { + STACK_SHRINK(1); + break; + } + + case IMPORT_FROM: { + STACK_GROW(1); + break; + } + + case JUMP_FORWARD: { + break; + } + + case JUMP_BACKWARD: { + break; + } + + case ENTER_EXECUTOR: { + break; + } + + case POP_JUMP_IF_FALSE: { + STACK_SHRINK(1); + break; + } + + case POP_JUMP_IF_TRUE: { + STACK_SHRINK(1); + break; + } + + case IS_NONE: { + break; + } + + case JUMP_BACKWARD_NO_INTERRUPT: { + break; + } + + case GET_LEN: { + STACK_GROW(1); + break; + } + + case MATCH_CLASS: { + STACK_SHRINK(2); + break; + } + + case MATCH_MAPPING: { + STACK_GROW(1); + break; + } + + case MATCH_SEQUENCE: { + STACK_GROW(1); + break; + } + + case MATCH_KEYS: { + STACK_GROW(1); + break; + } + + case GET_ITER: { + break; + } + + case GET_YIELD_FROM_ITER: { + break; + } + + case FOR_ITER: { + STACK_GROW(1); + break; + } + + case INSTRUMENTED_FOR_ITER: { + break; + } + + case _ITER_CHECK_LIST: { + break; + } + + case _ITER_JUMP_LIST: { + break; + } + + case _IS_ITER_EXHAUSTED_LIST: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_LIST: { + STACK_GROW(1); + break; + } + + case _ITER_CHECK_TUPLE: { + break; + } + + case _ITER_JUMP_TUPLE: { + break; + } + + case _IS_ITER_EXHAUSTED_TUPLE: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_TUPLE: { + STACK_GROW(1); + break; + } + + case _ITER_CHECK_RANGE: { + break; + } + + case _ITER_JUMP_RANGE: { + break; + } + + case _IS_ITER_EXHAUSTED_RANGE: { + STACK_GROW(1); + break; + } + + case _ITER_NEXT_RANGE: { + STACK_GROW(1); + break; + } + + case FOR_ITER_GEN: { + STACK_GROW(1); + break; + } + + case BEFORE_ASYNC_WITH: { + STACK_GROW(1); + break; + } + + case BEFORE_WITH: { + STACK_GROW(1); + break; + } + + case WITH_EXCEPT_START: { + STACK_GROW(1); + break; + } + + case PUSH_EXC_INFO: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_METHOD_WITH_VALUES: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_METHOD_NO_DICT: { + STACK_GROW(1); + break; + } + + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + break; + } + + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + break; + } + + case LOAD_ATTR_METHOD_LAZY_DICT: { + STACK_GROW(1); + break; + } + + case KW_NAMES: { + break; + } + + case INSTRUMENTED_CALL: { + break; + } + + case CALL: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_BOUND_METHOD_EXACT_ARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_PY_EXACT_ARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_PY_WITH_DEFAULTS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_TYPE_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_STR_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_TUPLE_1: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case EXIT_INIT_CHECK: { + STACK_SHRINK(1); + break; + } + + case CALL_BUILTIN_CLASS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_BUILTIN_O: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_BUILTIN_FAST: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_LEN: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_ISINSTANCE: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_LIST_APPEND: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_O: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + CHECK_EVAL_BREAKER(); + break; + } + + case INSTRUMENTED_CALL_FUNCTION_EX: { + break; + } + + case CALL_FUNCTION_EX: { + STACK_SHRINK(((oparg & 1) ? 1 : 0)); + STACK_SHRINK(2); + CHECK_EVAL_BREAKER(); + break; + } + + case MAKE_FUNCTION: { + break; + } + + case SET_FUNCTION_ATTRIBUTE: { + STACK_SHRINK(1); + break; + } + + case RETURN_GENERATOR: { + break; + } + + case BUILD_SLICE: { + STACK_SHRINK(((oparg == 3) ? 1 : 0)); + STACK_SHRINK(1); + break; + } + + case CONVERT_VALUE: { + break; + } + + case FORMAT_SIMPLE: { + break; + } + + case FORMAT_WITH_SPEC: { + STACK_SHRINK(1); + break; + } + + case COPY: { + STACK_GROW(1); + break; + } + + case BINARY_OP: { + STACK_SHRINK(1); + break; + } + + case SWAP: { + break; + } + + case INSTRUMENTED_INSTRUCTION: { + break; + } + + case INSTRUMENTED_JUMP_FORWARD: { + break; + } + + case INSTRUMENTED_JUMP_BACKWARD: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_TRUE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_FALSE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_NONE: { + break; + } + + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { + break; + } + + case EXTENDED_ARG: { + break; + } + + case CACHE: { + break; + } + + case RESERVED: { + break; + } + + case _POP_JUMP_IF_FALSE: { + STACK_SHRINK(1); + break; + } + + case _POP_JUMP_IF_TRUE: { + STACK_SHRINK(1); + break; + } + + case JUMP_TO_TOP: { + CHECK_EVAL_BREAKER(); + break; + } + + case SAVE_IP: { + break; + } + + case EXIT_TRACE: { + break; + } diff --git a/Python/optimizer.c b/Python/optimizer.c index 238ab02d09faa7..79280bb18448ff 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -4,6 +4,7 @@ #include "pycore_opcode.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uops.h" #include "cpython/optimizer.h" @@ -704,10 +705,11 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; + trace_length = uop_analyze_and_optimize(trace, trace_length); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); - if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { - executor->trace[trace_length].opcode = 0; // Sentinel - } + if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { + executor->trace[trace_length].opcode = 0; // Sentinel + } *exec_ptr = (_PyExecutorObject *)executor; return 1; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c new file mode 100644 index 00000000000000..d489fb802fca33 --- /dev/null +++ b/Python/optimizer_analysis.c @@ -0,0 +1,21 @@ +#include "Python.h" +#include "opcode.h" +#include "pycore_interp.h" +#include "pycore_opcode.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_uops.h" +#include "cpython/optimizer.h" +#include +#include +#include +#include "pycore_optimizer.h" + +int +uop_analyze_and_optimize( + _PyUOpInstruction *trace, + int trace_len) +{ + return trace_len; +} diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 967e1e2f5b63bb..4407410664c104 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -15,6 +15,7 @@ from flags import InstructionFlags, variable_used from instructions import ( AnyInstruction, + AbstractInstruction, Component, Instruction, MacroInstruction, @@ -43,6 +44,9 @@ DEFAULT_EXECUTOR_OUTPUT = os.path.relpath( os.path.join(ROOT, "Python/executor_cases.c.h") ) +DEFAULT_ABSTRACT_INTERPRETER_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Python/abstract_interp_cases.c.h") +) # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. @@ -91,7 +95,13 @@ help="Write executor cases to this file", default=DEFAULT_EXECUTOR_OUTPUT, ) - +arg_parser.add_argument( + "-a", + "--abstract-interpreter-cases", + type=str, + help="Write abstract interpreter cases to this file", + default=DEFAULT_ABSTRACT_INTERPRETER_OUTPUT, +) class Generator(Analyzer): def get_stack_effect_info( @@ -620,6 +630,39 @@ def write_executor_instructions( file=sys.stderr, ) + def write_abstract_interpreter_instructions( + self, abstract_interpreter_filename: str, emit_line_directives: bool + ) -> None: + """Generate cases for the Tier 2 abstract interpreter/analzyer.""" + with open(abstract_interpreter_filename, "w") as f: + self.out = Formatter(f, 8, emit_line_directives) + self.write_provenance_header() + for thing in self.everything: + match thing: + case OverriddenInstructionPlaceHolder(): + # TODO: Is this helpful? + self.write_overridden_instr_place_holder(thing) + case parsing.InstDef(): + instr = AbstractInstruction(self.instrs[thing.name].inst) + self.out.emit("") + with self.out.block(f"case {thing.name}:"): + instr.write(self.out, tier=TIER_TWO) + if instr.check_eval_breaker: + self.out.emit("CHECK_EVAL_BREAKER();") + self.out.emit("break;") + # elif instr.kind != "op": + # print(f"NOTE: {thing.name} is not a viable uop") + case parsing.Macro(): + pass + case parsing.Pseudo(): + pass + case _: + typing.assert_never(thing) + print( + f"Wrote some stuff to {abstract_interpreter_filename}", + file=sys.stderr, + ) + def write_overridden_instr_place_holder( self, place_holder: OverriddenInstructionPlaceHolder ) -> None: @@ -724,6 +767,8 @@ def main(): a.write_instructions(args.output, args.emit_line_directives) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) + a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases, + args.emit_line_directives) if __name__ == "__main__": diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 6f42699d900b46..6184bea9850dc7 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -310,6 +310,37 @@ def write_body( StackEffectMapping = list[tuple[StackEffect, StackEffect]] +# Instruction used for abstract interpretation. +class AbstractInstruction(Instruction): + def __init__(self, inst: parsing.InstDef): + super().__init__(inst) + + def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: + """Write one abstract instruction, sans prologue and epilogue.""" + # Write a static assertion that a family's cache size is correct + if family := self.family: + if self.name == family.name: + if cache_size := family.size: + out.emit( + f"static_assert({cache_size} == " + f'{self.cache_offset}, "incorrect cache size");' + ) + # Write net stack growth/shrinkage + out.stack_adjust( + [ieff for ieff in self.input_effects], + [oeff for oeff in self.output_effects], + ) + + def write_body( + self, + out: Formatter, + dedent: int, + active_caches: list[ActiveCacheEffect], + tier: Tiers = TIER_ONE, + ) -> None: + pass + + @dataclasses.dataclass class Component: instr: Instruction From 2aeea51c4e94e44767d87869723a05f1c5fd15fb Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 2 Aug 2023 09:55:27 +0000 Subject: [PATCH 02/48] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst b/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst new file mode 100644 index 00000000000000..392f59c79e8de9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-08-02-09-55-21.gh-issue-107557.P1z-in.rst @@ -0,0 +1 @@ +Generate the cases needed for the barebones tier 2 abstract interpreter for optimization passes in CPython. From 1a728ab6ce121e30d8f475c9499b2a84eb134ebf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:50:24 +0800 Subject: [PATCH 03/48] Copy Guido's input and output code, and fix build Co-Authored-By: Guido van Rossum --- Include/internal/pycore_optimizer.h | 4 +- Python/abstract_interp_cases.c.h | 412 ++++++++++++++++++++++++++ Tools/cases_generator/instructions.py | 38 +++ 3 files changed, 453 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 3b5fa634e13972..06d05cf00babd3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,7 +8,9 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -extern int uop_analyze_and_optimize(struct _PyUOpInstruction *trace, int trace_len); +#include "pycore_uops.h" + +int uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); #ifdef __cplusplus } diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 4b05477da45e3a..2adf4deccea4bd 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -17,77 +17,104 @@ case LOAD_FAST_CHECK: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_FAST_LOAD_FAST: { STACK_GROW(2); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_CONST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_FAST: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_FAST_LOAD_FAST: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case STORE_FAST_STORE_FAST: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case POP_TOP: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case PUSH_NULL: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_FOR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case END_SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case UNARY_NEGATIVE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case UNARY_NOT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -96,26 +123,38 @@ } case TO_BOOL_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_LIST: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_NONE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_STR: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case TO_BOOL_ALWAYS_TRUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case UNARY_INVERT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -124,17 +163,26 @@ } case _BINARY_OP_MULTIPLY_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -143,17 +191,26 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -162,105 +219,155 @@ } case _BINARY_OP_ADD_UNICODE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case _BINARY_OP_INPLACE_ADD_UNICODE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case BINARY_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SLICE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); + stack_pointer[-1] = NULL; break; } case STORE_SLICE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; + stack_pointer[-4] = NULL; STACK_SHRINK(4); break; } case BINARY_SUBSCR_LIST_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_TUPLE_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_GETITEM: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case LIST_APPEND: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_ADD: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_LIST_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case DELETE_SUBSCR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CALL_INTRINSIC_1: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case CALL_INTRINSIC_2: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case RAISE_VARARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); break; } case INTERPRETER_EXIT: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RETURN_VALUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case INSTRUMENTED_RETURN_VALUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -274,65 +381,90 @@ } case GET_AITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case GET_ANEXT: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case GET_AWAITABLE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SEND: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SEND_GEN: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case INSTRUMENTED_YIELD_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case YIELD_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case POP_EXCEPT: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RERAISE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case END_ASYNC_FOR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CLEANUP_THROW: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ASSERTION_ERROR: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LOAD_BUILD_CLASS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_NAME: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -342,45 +474,61 @@ } case UNPACK_SEQUENCE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TWO_TUPLE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TUPLE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_LIST: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); + stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_EX: { + stack_pointer[-1] = NULL; STACK_GROW((oparg & 0xFF) + (oparg >> 8)); + stack_pointer - ((oparg >> 8)) = (PyObject **)NULL; + stack_pointer[-(1 + (oparg >> 8))] = NULL; + stack_pointer - (1 + (oparg >> 8) + (oparg & 0xFF)) = (PyObject **)NULL; break; } case STORE_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case DELETE_ATTR: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_GLOBAL: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -391,16 +539,21 @@ case _LOAD_LOCALS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _LOAD_FROM_DICT_OR_GLOBALS: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -415,12 +568,16 @@ case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -437,15 +594,19 @@ } case LOAD_FROM_DICT_OR_DEREF: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_DEREF: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case STORE_DEREF: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -455,42 +616,54 @@ } case BUILD_STRING: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_TUPLE: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_LIST: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case LIST_EXTEND: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_UPDATE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case BUILD_SET: { + (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BUILD_MAP: { + (stack_pointer - oparg*2) = (PyObject **)NULL; STACK_SHRINK(oparg*2); STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -499,21 +672,28 @@ } case BUILD_CONST_KEY_MAP: { + stack_pointer[-1] = NULL; + (stack_pointer - (1 + oparg)) = (PyObject **)NULL; STACK_SHRINK(oparg); + stack_pointer[-1] = NULL; break; } case DICT_UPDATE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case DICT_MERGE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case MAP_ADD: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } @@ -525,24 +705,42 @@ } case LOAD_SUPER_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_ATTR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_METHOD: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -555,100 +753,155 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_MODULE: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_WITH_HINT: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_SLOT: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_CLASS: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_PROPERTY: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case STORE_ATTR_INSTANCE_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_WITH_HINT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_SLOT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case COMPARE_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_FLOAT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_INT: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COMPARE_OP_STR: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case IS_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CONTAINS_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CHECK_EG_MATCH: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case CHECK_EXC_MATCH: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case IMPORT_NAME: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case IMPORT_FROM: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -665,16 +918,20 @@ } case POP_JUMP_IF_FALSE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case POP_JUMP_IF_TRUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case IS_NONE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } @@ -684,39 +941,52 @@ case GET_LEN: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_CLASS: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; + stack_pointer[-3] = NULL; STACK_SHRINK(2); + stack_pointer[-1] = NULL; break; } case MATCH_MAPPING: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_SEQUENCE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case MATCH_KEYS: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case GET_ITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case GET_YIELD_FROM_ITER: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FOR_ITER: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -734,11 +1004,13 @@ case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_LIST: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -752,11 +1024,13 @@ case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_TUPLE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } @@ -770,59 +1044,87 @@ case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case _ITER_NEXT_RANGE: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case FOR_ITER_GEN: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BEFORE_ASYNC_WITH: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case BEFORE_WITH: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case WITH_EXCEPT_START: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case PUSH_EXC_INFO: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_METHOD_WITH_VALUES: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_METHOD_NO_DICT: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_LAZY_DICT: { + stack_pointer[-1] = NULL; STACK_GROW(1); + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; break; } @@ -835,131 +1137,208 @@ } case CALL: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_BOUND_METHOD_EXACT_ARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_PY_EXACT_ARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_PY_WITH_DEFAULTS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_TYPE_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_STR_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_TUPLE_1: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case EXIT_INIT_CHECK: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case CALL_BUILTIN_CLASS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_BUILTIN_O: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_BUILTIN_FAST: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_LEN: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_ISINSTANCE: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_LIST_APPEND: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CALL_NO_KW_METHOD_DESCRIPTOR_O: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { + (stack_pointer - oparg) = (PyObject **)NULL; + stack_pointer[-(1 + oparg)] = NULL; + stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } @@ -969,18 +1348,28 @@ } case CALL_FUNCTION_EX: { + (oparg & 1) ? stack_pointer[-(((oparg & 1) ? 1 : 0))] : NULL = NULL; + stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; + stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))] = NULL; + stack_pointer[-(3 + ((oparg & 1) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); + stack_pointer[-1] = NULL; CHECK_EVAL_BREAKER(); break; } case MAKE_FUNCTION: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case SET_FUNCTION_ATTRIBUTE: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } @@ -989,35 +1378,56 @@ } case BUILD_SLICE: { + (oparg == 3) ? stack_pointer[-(((oparg == 3) ? 1 : 0))] : NULL = NULL; + stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))] = NULL; + stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case CONVERT_VALUE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FORMAT_SIMPLE: { + stack_pointer[-1] = NULL; + stack_pointer[-1] = NULL; break; } case FORMAT_WITH_SPEC: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case COPY: { STACK_GROW(1); + stack_pointer[-1] = NULL; break; } case BINARY_OP: { + stack_pointer[-1] = NULL; + stack_pointer[-2] = NULL; STACK_SHRINK(1); + stack_pointer[-1] = NULL; break; } case SWAP: { + stack_pointer[-1] = NULL; + (stack_pointer - (1 + (oparg-2))) = (PyObject **)NULL; + stack_pointer[-(2 + (oparg-2))] = NULL; + stack_pointer[-1] = NULL; + stack_pointer - (1 + (oparg-2)) = (PyObject **)NULL; + stack_pointer[-(2 + (oparg-2))] = NULL; break; } @@ -1062,11 +1472,13 @@ } case _POP_JUMP_IF_FALSE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case _POP_JUMP_IF_TRUE: { + stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 6184bea9850dc7..f3f3093219b65a 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -325,12 +325,50 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f"static_assert({cache_size} == " f'{self.cache_offset}, "incorrect cache size");' ) + # NULL out inputs, unless it's the same as in the output, + # Write input stack effect variable declarations and initializations + ieffects = list(reversed(self.input_effects)) + for i, ieffect in enumerate(ieffects): + if ieffect.name in self.unmoved_names: + continue + isize = string_effect_size( + list_effect_size([ieff for ieff in ieffects[: i + 1]]) + ) + if ieffect.size: + src = StackEffect( + f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **" + ) + elif ieffect.cond: + src = StackEffect( + f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", + "", + ) + else: + src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "") + out.assign(src, parsing.StackEffect("NULL")) + # Write net stack growth/shrinkage out.stack_adjust( [ieff for ieff in self.input_effects], [oeff for oeff in self.output_effects], ) + # NULL out outputs, unless it's same as input. + oeffects = list(reversed(self.output_effects)) + for i, oeffect in enumerate(oeffects): + if oeffect.name in self.unmoved_names: + continue + osize = string_effect_size( + list_effect_size([oeff for oeff in oeffects[: i + 1]]) + ) + if oeffect.size: + dst = StackEffect( + f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **" + ) + else: + dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "") + out.assign(dst, parsing.StackEffect("NULL")) + def write_body( self, out: Formatter, From 17fccbca34ae7bc433e1ba1fff4b6e9d617c16d6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:54:50 +0800 Subject: [PATCH 04/48] fix separator --- Python/abstract_interp_cases.c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 2adf4deccea4bd..ac8c3a367da49f 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { From a1da69db9529d988d6dc5ff6688105d9d235b6bd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:59:17 +0800 Subject: [PATCH 05/48] credit Jules --- Tools/cases_generator/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index f3f3093219b65a..197a79f539d5e7 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -326,7 +326,7 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f'{self.cache_offset}, "incorrect cache size");' ) # NULL out inputs, unless it's the same as in the output, - # Write input stack effect variable declarations and initializations + # Write input stack effect variable declarations and initializations. ieffects = list(reversed(self.input_effects)) for i, ieffect in enumerate(ieffects): if ieffect.name in self.unmoved_names: From b458e171cbe24a71cb721da0606b07ff3e3f8ba1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 20:00:18 +0800 Subject: [PATCH 06/48] add jules to co-authors Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d489fb802fca33..13d2b8c48637b1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -15,7 +15,8 @@ int uop_analyze_and_optimize( _PyUOpInstruction *trace, - int trace_len) + int trace_len +) { return trace_len; } From f81f8889d2cfa1160e106c3cdd0e592adb2d742f Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Aug 2023 23:46:58 +0800 Subject: [PATCH 07/48] add pycore_optimizer.h to headers in makefile --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index a334aae9dec4fc..ce6e38bacc81ae 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1787,6 +1787,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ $(srcdir)/Include/internal/pycore_opcode.h \ $(srcdir)/Include/internal/pycore_opcode_utils.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ From 0020320d0f5aa58f6aa2a36b8f025396f5b7ba55 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 00:03:59 +0800 Subject: [PATCH 08/48] fix: remove whitespace --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index ce6e38bacc81ae..94beadabce18cc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1787,7 +1787,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ $(srcdir)/Include/internal/pycore_opcode.h \ $(srcdir)/Include/internal/pycore_opcode_utils.h \ - $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ From 1f93072c596e48f11ca715ac54091e885bebbbf9 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:09:26 +0800 Subject: [PATCH 09/48] fix make smelly --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 06d05cf00babd3..ccbe7e52af289a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_uops.h" -int uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); +int _Py_uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); #ifdef __cplusplus } From dac63e348441a99ba2844a22abdc214aad3402f1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:11:34 +0800 Subject: [PATCH 10/48] fix: build --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 13d2b8c48637b1..cb399b0beb9129 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,7 +13,7 @@ #include "pycore_optimizer.h" int -uop_analyze_and_optimize( +_Py_uop_analyze_and_optimize( _PyUOpInstruction *trace, int trace_len ) From e62e0153cd765bea9786407b32272dcb0e5dc234 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:35:01 +0800 Subject: [PATCH 11/48] fix wrong symbol --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 79280bb18448ff..3f9a82cf1daa13 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -705,7 +705,7 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - trace_length = uop_analyze_and_optimize(trace, trace_length); + trace_length = _Py_uop_analyze_and_optimize(trace, trace_length); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { executor->trace[trace_length].opcode = 0; // Sentinel From a7f654cafe24b9dc65188e8514be5fd1c472ab36 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 3 Aug 2023 02:10:11 +0800 Subject: [PATCH 12/48] ignore static globals check for abstract interpreter --- Tools/c-analyzer/cpython/_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 9bc7285e18b2fb..90334d0e79da80 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -84,6 +84,7 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/abstract_interp_cases.c.h # not actually source Python/bytecodes.c From ec58145acf047f75ce9b34418327a97a158c262e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:23:56 +0800 Subject: [PATCH 13/48] merge Guido's changes --- Python/abstract_interp_cases.c.h | 308 +++----------------------- Tools/cases_generator/instructions.py | 43 +--- Tools/cases_generator/stacking.py | 36 +++ 3 files changed, 65 insertions(+), 322 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ac8c3a367da49f..8356ad15299f8e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -35,8 +35,8 @@ case LOAD_FAST_LOAD_FAST: { STACK_GROW(2); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -47,26 +47,21 @@ } case STORE_FAST: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_FAST_LOAD_FAST: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case STORE_FAST_STORE_FAST: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case POP_TOP: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -78,42 +73,33 @@ } case INSTRUMENTED_END_FOR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case END_SEND: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case INSTRUMENTED_END_SEND: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case UNARY_NEGATIVE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case UNARY_NOT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -123,37 +109,31 @@ } case TO_BOOL_INT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_LIST: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_NONE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_STR: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case TO_BOOL_ALWAYS_TRUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case UNARY_INVERT: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -163,24 +143,18 @@ } case _BINARY_OP_MULTIPLY_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -191,24 +165,18 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_ADD_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_SUBTRACT_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -219,155 +187,114 @@ } case _BINARY_OP_ADD_UNICODE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case _BINARY_OP_INPLACE_ADD_UNICODE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case BINARY_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SLICE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } case STORE_SLICE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; - stack_pointer[-4] = NULL; STACK_SHRINK(4); break; } case BINARY_SUBSCR_LIST_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_TUPLE_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case BINARY_SUBSCR_GETITEM: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case LIST_APPEND: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_ADD: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_LIST_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case STORE_SUBSCR_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(3); break; } case DELETE_SUBSCR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CALL_INTRINSIC_1: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case CALL_INTRINSIC_2: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case RAISE_VARARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); break; } case INTERPRETER_EXIT: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RETURN_VALUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case INSTRUMENTED_RETURN_VALUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -381,7 +308,6 @@ } case GET_AITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -393,61 +319,49 @@ } case GET_AWAITABLE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SEND: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SEND_GEN: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case INSTRUMENTED_YIELD_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case YIELD_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case POP_EXCEPT: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case RERAISE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case END_ASYNC_FOR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case CLEANUP_THROW: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -464,7 +378,6 @@ } case STORE_NAME: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -474,61 +387,46 @@ } case UNPACK_SEQUENCE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TWO_TUPLE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_TUPLE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_SEQUENCE_LIST: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); STACK_GROW(oparg); - stack_pointer - oparg = (PyObject **)NULL; break; } case UNPACK_EX: { - stack_pointer[-1] = NULL; STACK_GROW((oparg & 0xFF) + (oparg >> 8)); - stack_pointer - ((oparg >> 8)) = (PyObject **)NULL; - stack_pointer[-(1 + (oparg >> 8))] = NULL; - stack_pointer - (1 + (oparg >> 8) + (oparg & 0xFF)) = (PyObject **)NULL; + stack_pointer[-1 - (oparg >> 8)] = NULL; break; } case STORE_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case DELETE_ATTR: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case STORE_GLOBAL: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -544,7 +442,6 @@ } case _LOAD_FROM_DICT_OR_GLOBALS: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -552,8 +449,8 @@ case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -568,16 +465,16 @@ case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -594,7 +491,6 @@ } case LOAD_FROM_DICT_OR_DEREF: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -606,7 +502,6 @@ } case STORE_DEREF: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } @@ -616,7 +511,6 @@ } case BUILD_STRING: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -624,7 +518,6 @@ } case BUILD_TUPLE: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -632,7 +525,6 @@ } case BUILD_LIST: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -640,19 +532,16 @@ } case LIST_EXTEND: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case SET_UPDATE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case BUILD_SET: { - (stack_pointer - oparg) = (PyObject **)NULL; STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -660,7 +549,6 @@ } case BUILD_MAP: { - (stack_pointer - oparg*2) = (PyObject **)NULL; STACK_SHRINK(oparg*2); STACK_GROW(1); stack_pointer[-1] = NULL; @@ -672,28 +560,22 @@ } case BUILD_CONST_KEY_MAP: { - stack_pointer[-1] = NULL; - (stack_pointer - (1 + oparg)) = (PyObject **)NULL; STACK_SHRINK(oparg); stack_pointer[-1] = NULL; break; } case DICT_UPDATE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case DICT_MERGE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case MAP_ADD: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } @@ -701,46 +583,38 @@ case INSTRUMENTED_LOAD_SUPER_ATTR: { STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_SUPER_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_ATTR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_SUPER_ATTR_METHOD: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } @@ -753,147 +627,117 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_MODULE: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_WITH_HINT: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_SLOT: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_CLASS: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_PROPERTY: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { - stack_pointer[-1] = NULL; STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; break; } case STORE_ATTR_INSTANCE_VALUE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_WITH_HINT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case STORE_ATTR_SLOT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(2); break; } case COMPARE_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_FLOAT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_INT: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case COMPARE_OP_STR: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case IS_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case CONTAINS_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case CHECK_EG_MATCH: { - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; break; } case CHECK_EXC_MATCH: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case IMPORT_NAME: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -918,19 +762,16 @@ } case POP_JUMP_IF_FALSE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case POP_JUMP_IF_TRUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case IS_NONE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -946,9 +787,6 @@ } case MATCH_CLASS: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; - stack_pointer[-3] = NULL; STACK_SHRINK(2); stack_pointer[-1] = NULL; break; @@ -973,13 +811,11 @@ } case GET_ITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case GET_YIELD_FROM_ITER: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } @@ -1061,18 +897,16 @@ } case BEFORE_ASYNC_WITH: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case BEFORE_WITH: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -1083,48 +917,42 @@ } case PUSH_EXC_INFO: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_WITH_VALUES: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_NO_DICT: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - stack_pointer[-1] = NULL; - stack_pointer[-1] = NULL; + stack_pointer[-1 - (0 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; break; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - stack_pointer[-1] = NULL; - stack_pointer[-1] = NULL; + stack_pointer[-1 - (0 ? 1 : 0)] = NULL; stack_pointer[-1] = NULL; break; } case LOAD_ATTR_METHOD_LAZY_DICT: { - stack_pointer[-1] = NULL; STACK_GROW(1); - stack_pointer[-1] = NULL; stack_pointer[-2] = NULL; + stack_pointer[-1] = NULL; break; } @@ -1137,9 +965,6 @@ } case CALL: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1148,9 +973,6 @@ } case CALL_BOUND_METHOD_EXACT_ARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1158,9 +980,6 @@ } case CALL_PY_EXACT_ARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1168,9 +987,6 @@ } case CALL_PY_WITH_DEFAULTS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1178,9 +994,6 @@ } case CALL_NO_KW_TYPE_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1188,9 +1001,6 @@ } case CALL_NO_KW_STR_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1199,9 +1009,6 @@ } case CALL_NO_KW_TUPLE_1: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1210,9 +1017,6 @@ } case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1220,15 +1024,11 @@ } case EXIT_INIT_CHECK: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case CALL_BUILTIN_CLASS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1237,9 +1037,6 @@ } case CALL_NO_KW_BUILTIN_O: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1248,9 +1045,6 @@ } case CALL_NO_KW_BUILTIN_FAST: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1259,9 +1053,6 @@ } case CALL_BUILTIN_FAST_WITH_KEYWORDS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1270,9 +1061,6 @@ } case CALL_NO_KW_LEN: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1280,9 +1068,6 @@ } case CALL_NO_KW_ISINSTANCE: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1290,9 +1075,6 @@ } case CALL_NO_KW_LIST_APPEND: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1300,9 +1082,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_O: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1311,9 +1090,6 @@ } case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1322,9 +1098,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1333,9 +1106,6 @@ } case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { - (stack_pointer - oparg) = (PyObject **)NULL; - stack_pointer[-(1 + oparg)] = NULL; - stack_pointer[-(2 + oparg)] = NULL; STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1348,10 +1118,6 @@ } case CALL_FUNCTION_EX: { - (oparg & 1) ? stack_pointer[-(((oparg & 1) ? 1 : 0))] : NULL = NULL; - stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = NULL; - stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))] = NULL; - stack_pointer[-(3 + ((oparg & 1) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); stack_pointer[-1] = NULL; @@ -1360,14 +1126,11 @@ } case MAKE_FUNCTION: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case SET_FUNCTION_ATTRIBUTE: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -1378,9 +1141,6 @@ } case BUILD_SLICE: { - (oparg == 3) ? stack_pointer[-(((oparg == 3) ? 1 : 0))] : NULL = NULL; - stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))] = NULL; - stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))] = NULL; STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); stack_pointer[-1] = NULL; @@ -1388,20 +1148,16 @@ } case CONVERT_VALUE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case FORMAT_SIMPLE: { - stack_pointer[-1] = NULL; stack_pointer[-1] = NULL; break; } case FORMAT_WITH_SPEC: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; @@ -1414,20 +1170,14 @@ } case BINARY_OP: { - stack_pointer[-1] = NULL; - stack_pointer[-2] = NULL; STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } case SWAP: { + stack_pointer[-2 - (oparg-2)] = NULL; stack_pointer[-1] = NULL; - (stack_pointer - (1 + (oparg-2))) = (PyObject **)NULL; - stack_pointer[-(2 + (oparg-2))] = NULL; - stack_pointer[-1] = NULL; - stack_pointer - (1 + (oparg-2)) = (PyObject **)NULL; - stack_pointer[-(2 + (oparg-2))] = NULL; break; } @@ -1472,13 +1222,11 @@ } case _POP_JUMP_IF_FALSE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } case _POP_JUMP_IF_TRUE: { - stack_pointer[-1] = NULL; STACK_SHRINK(1); break; } diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index c10dfd241684f5..a10e8f41ab67db 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -263,49 +263,8 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: f"static_assert({cache_size} == " f'{self.cache_offset}, "incorrect cache size");' ) - # NULL out inputs, unless it's the same as in the output, - # Write input stack effect variable declarations and initializations. - ieffects = list(reversed(self.input_effects)) - for i, ieffect in enumerate(ieffects): - if ieffect.name in self.unmoved_names: - continue - isize = string_effect_size( - list_effect_size([ieff for ieff in ieffects[: i + 1]]) - ) - if ieffect.size: - src = StackEffect( - f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **" - ) - elif ieffect.cond: - src = StackEffect( - f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", - "", - ) - else: - src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "") - out.assign(src, parsing.StackEffect("NULL")) - # Write net stack growth/shrinkage - out.stack_adjust( - [ieff for ieff in self.input_effects], - [oeff for oeff in self.output_effects], - ) - - # NULL out outputs, unless it's same as input. - oeffects = list(reversed(self.output_effects)) - for i, oeffect in enumerate(oeffects): - if oeffect.name in self.unmoved_names: - continue - osize = string_effect_size( - list_effect_size([oeff for oeff in oeffects[: i + 1]]) - ) - if oeffect.size: - dst = StackEffect( - f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **" - ) - else: - dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "") - out.assign(dst, parsing.StackEffect("NULL")) + stacking.write_single_instr_for_abstract_interp(self, out) def write_body( self, diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 23eca3037f896d..8ae08f70904305 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -398,3 +398,39 @@ def write_components( ), poke.effect, ) + + +def write_single_instr_for_abstract_interp( + instr: Instruction, out: Formatter +): + try: + _write_components_for_abstract_interp( + [Component(instr, instr.active_caches)], + out, + ) + except AssertionError as err: + raise AssertionError(f"Error writing abstract instruction {instr.name}") from err + + +def _write_components_for_abstract_interp( + parts: list[Component], + out: Formatter, +): + managers = get_managers(parts) + for mgr in managers: + if mgr is managers[-1]: + out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) + # Use clone() since adjust_inverse() mutates final_offset + mgr.adjust_inverse(mgr.final_offset.clone()) + # NULL out the output stack effects + for poke in mgr.pokes: + if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: + out.assign( + StackEffect( + poke.as_variable(), + poke.effect.type, + poke.effect.cond, + poke.effect.size, + ), + StackEffect("NULL"), + ) From 429276733ff98850eee575c7faf5bcee0b69ae2c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:31:21 +0800 Subject: [PATCH 14/48] remove unused stuff --- Python/abstract_interp_cases.c.h | 505 +++++++----------------- Tools/cases_generator/generate_cases.py | 14 +- 2 files changed, 150 insertions(+), 369 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 8356ad15299f8e..33e726cc78e17d 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -3,17 +3,13 @@ // Python/bytecodes.c // Do not edit! + case NOP: { break; } - case RESUME: { - break; - } - case INSTRUMENTED_RESUME: { - break; - } + case LOAD_FAST_CHECK: { STACK_GROW(1); @@ -21,24 +17,21 @@ break; } + case LOAD_FAST: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case LOAD_FAST_LOAD_FAST: { - STACK_GROW(2); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_CONST: { STACK_GROW(1); @@ -46,36 +39,28 @@ break; } + case STORE_FAST: { STACK_SHRINK(1); break; } - case STORE_FAST_LOAD_FAST: { - stack_pointer[-1] = NULL; - break; - } - case STORE_FAST_STORE_FAST: { - STACK_SHRINK(2); - break; - } + case POP_TOP: { STACK_SHRINK(1); break; } + case PUSH_NULL: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case INSTRUMENTED_END_FOR: { - STACK_SHRINK(2); - break; - } + case END_SEND: { STACK_SHRINK(1); @@ -83,119 +68,131 @@ break; } - case INSTRUMENTED_END_SEND: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case UNARY_NEGATIVE: { stack_pointer[-1] = NULL; break; } + case UNARY_NOT: { stack_pointer[-1] = NULL; break; } + case TO_BOOL: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_BOOL: { break; } + case TO_BOOL_INT: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_LIST: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_NONE: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_STR: { stack_pointer[-1] = NULL; break; } + case TO_BOOL_ALWAYS_TRUE: { stack_pointer[-1] = NULL; break; } + case UNARY_INVERT: { stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_INT: { break; } + case _BINARY_OP_MULTIPLY_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_ADD_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_SUBTRACT_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_FLOAT: { break; } + case _BINARY_OP_MULTIPLY_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_ADD_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _BINARY_OP_SUBTRACT_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case _GUARD_BOTH_UNICODE: { break; } + case _BINARY_OP_ADD_UNICODE: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case _BINARY_OP_INPLACE_ADD_UNICODE: { - STACK_SHRINK(2); - break; - } + case BINARY_SUBSCR: { STACK_SHRINK(1); @@ -203,167 +200,128 @@ break; } + case BINARY_SLICE: { STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } + case STORE_SLICE: { STACK_SHRINK(4); break; } + case BINARY_SUBSCR_LIST_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case BINARY_SUBSCR_DICT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case BINARY_SUBSCR_GETITEM: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case LIST_APPEND: { STACK_SHRINK(1); break; } + case SET_ADD: { STACK_SHRINK(1); break; } + case STORE_SUBSCR: { STACK_SHRINK(3); break; } + case STORE_SUBSCR_LIST_INT: { STACK_SHRINK(3); break; } + case STORE_SUBSCR_DICT: { STACK_SHRINK(3); break; } + case DELETE_SUBSCR: { STACK_SHRINK(2); break; } + case CALL_INTRINSIC_1: { stack_pointer[-1] = NULL; break; } + case CALL_INTRINSIC_2: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case RAISE_VARARGS: { - STACK_SHRINK(oparg); - break; - } - case INTERPRETER_EXIT: { - STACK_SHRINK(1); - break; - } - case RETURN_VALUE: { - STACK_SHRINK(1); - break; - } - case INSTRUMENTED_RETURN_VALUE: { - STACK_SHRINK(1); - break; - } - case RETURN_CONST: { - break; - } - case INSTRUMENTED_RETURN_CONST: { - break; - } + case GET_AITER: { stack_pointer[-1] = NULL; break; } + case GET_ANEXT: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case GET_AWAITABLE: { stack_pointer[-1] = NULL; break; } - case SEND: { - stack_pointer[-1] = NULL; - break; - } - case SEND_GEN: { - stack_pointer[-1] = NULL; - break; - } - case INSTRUMENTED_YIELD_VALUE: { - stack_pointer[-1] = NULL; - break; - } - case YIELD_VALUE: { - stack_pointer[-1] = NULL; - break; - } + case POP_EXCEPT: { STACK_SHRINK(1); break; } - case RERAISE: { - STACK_SHRINK(1); - break; - } - case END_ASYNC_FOR: { - STACK_SHRINK(2); - break; - } - case CLEANUP_THROW: { - STACK_SHRINK(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_ASSERTION_ERROR: { STACK_GROW(1); @@ -371,81 +329,96 @@ break; } + case LOAD_BUILD_CLASS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case STORE_NAME: { STACK_SHRINK(1); break; } + case DELETE_NAME: { break; } + case UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_TWO_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_SEQUENCE_LIST: { STACK_SHRINK(1); STACK_GROW(oparg); break; } + case UNPACK_EX: { STACK_GROW((oparg & 0xFF) + (oparg >> 8)); stack_pointer[-1 - (oparg >> 8)] = NULL; break; } + case STORE_ATTR: { STACK_SHRINK(2); break; } + case DELETE_ATTR: { STACK_SHRINK(1); break; } + case STORE_GLOBAL: { STACK_SHRINK(1); break; } + case DELETE_GLOBAL: { break; } + case _LOAD_LOCALS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _LOAD_FROM_DICT_OR_GLOBALS: { stack_pointer[-1] = NULL; break; } + case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -454,14 +427,17 @@ break; } + case _GUARD_GLOBALS_VERSION: { break; } + case _GUARD_BUILTINS_VERSION: { break; } + case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -470,6 +446,7 @@ break; } + case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); @@ -478,38 +455,42 @@ break; } + case DELETE_FAST: { break; } - case MAKE_CELL: { - break; - } + case DELETE_DEREF: { break; } + case LOAD_FROM_DICT_OR_DEREF: { stack_pointer[-1] = NULL; break; } + case LOAD_DEREF: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case STORE_DEREF: { STACK_SHRINK(1); break; } + case COPY_FREE_VARS: { break; } + case BUILD_STRING: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -517,6 +498,7 @@ break; } + case BUILD_TUPLE: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -524,6 +506,7 @@ break; } + case BUILD_LIST: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -531,16 +514,19 @@ break; } + case LIST_EXTEND: { STACK_SHRINK(1); break; } + case SET_UPDATE: { STACK_SHRINK(1); break; } + case BUILD_SET: { STACK_SHRINK(oparg); STACK_GROW(1); @@ -548,6 +534,7 @@ break; } + case BUILD_MAP: { STACK_SHRINK(oparg*2); STACK_GROW(1); @@ -555,46 +542,38 @@ break; } + case SETUP_ANNOTATIONS: { break; } + case BUILD_CONST_KEY_MAP: { STACK_SHRINK(oparg); stack_pointer[-1] = NULL; break; } + case DICT_UPDATE: { STACK_SHRINK(1); break; } + case DICT_MERGE: { STACK_SHRINK(1); break; } + case MAP_ADD: { STACK_SHRINK(2); break; } - case INSTRUMENTED_LOAD_SUPER_ATTR: { - STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_SUPER_ATTR: { - STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } + case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); @@ -604,6 +583,7 @@ break; } + case LOAD_SUPER_ATTR_METHOD: { STACK_SHRINK(1); stack_pointer[-2] = NULL; @@ -611,6 +591,7 @@ break; } + case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; @@ -618,14 +599,17 @@ break; } + case _GUARD_TYPE_VERSION: { break; } + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { break; } + case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; @@ -633,62 +617,15 @@ break; } - case LOAD_ATTR_MODULE: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_WITH_HINT: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_SLOT: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_CLASS: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_PROPERTY: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { - STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case STORE_ATTR_INSTANCE_VALUE: { - STACK_SHRINK(2); - break; - } - case STORE_ATTR_WITH_HINT: { - STACK_SHRINK(2); - break; - } - case STORE_ATTR_SLOT: { - STACK_SHRINK(2); - break; - } + case COMPARE_OP: { STACK_SHRINK(1); @@ -696,89 +633,68 @@ break; } + case COMPARE_OP_FLOAT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COMPARE_OP_INT: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COMPARE_OP_STR: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case IS_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case CONTAINS_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case CHECK_EG_MATCH: { stack_pointer[-2] = NULL; stack_pointer[-1] = NULL; break; } + case CHECK_EXC_MATCH: { stack_pointer[-1] = NULL; break; } - case IMPORT_NAME: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case IMPORT_FROM: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case JUMP_FORWARD: { - break; - } - case JUMP_BACKWARD: { - break; - } - case ENTER_EXECUTOR: { - break; - } - case POP_JUMP_IF_FALSE: { - STACK_SHRINK(1); - break; - } - case POP_JUMP_IF_TRUE: { - STACK_SHRINK(1); - break; - } + case IS_NONE: { stack_pointer[-1] = NULL; break; } - case JUMP_BACKWARD_NO_INTERRUPT: { - break; - } + case GET_LEN: { STACK_GROW(1); @@ -786,57 +702,54 @@ break; } + case MATCH_CLASS: { STACK_SHRINK(2); stack_pointer[-1] = NULL; break; } + case MATCH_MAPPING: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case MATCH_SEQUENCE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case MATCH_KEYS: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case GET_ITER: { stack_pointer[-1] = NULL; break; } + case GET_YIELD_FROM_ITER: { stack_pointer[-1] = NULL; break; } - case FOR_ITER: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case INSTRUMENTED_FOR_ITER: { - break; - } + case _ITER_CHECK_LIST: { break; } - case _ITER_JUMP_LIST: { - break; - } + case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); @@ -844,19 +757,19 @@ break; } + case _ITER_NEXT_LIST: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _ITER_CHECK_TUPLE: { break; } - case _ITER_JUMP_TUPLE: { - break; - } + case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); @@ -864,19 +777,19 @@ break; } + case _ITER_NEXT_TUPLE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case _ITER_CHECK_RANGE: { break; } - case _ITER_JUMP_RANGE: { - break; - } + case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); @@ -884,31 +797,16 @@ break; } + case _ITER_NEXT_RANGE: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } - case FOR_ITER_GEN: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - case BEFORE_ASYNC_WITH: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case BEFORE_WITH: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } + case WITH_EXCEPT_START: { STACK_GROW(1); @@ -916,6 +814,7 @@ break; } + case PUSH_EXC_INFO: { STACK_GROW(1); stack_pointer[-2] = NULL; @@ -923,75 +822,17 @@ break; } - case LOAD_ATTR_METHOD_WITH_VALUES: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_METHOD_NO_DICT: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - stack_pointer[-1 - (0 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - stack_pointer[-1 - (0 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; - break; - } - case LOAD_ATTR_METHOD_LAZY_DICT: { - STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; - break; - } - case KW_NAMES: { - break; - } - case INSTRUMENTED_CALL: { - break; - } - case CALL: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } - case CALL_BOUND_METHOD_EXACT_ARGS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case CALL_PY_EXACT_ARGS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - case CALL_PY_WITH_DEFAULTS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case CALL_NO_KW_TYPE_1: { STACK_SHRINK(oparg); @@ -1000,65 +841,47 @@ break; } + case CALL_NO_KW_STR_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_TUPLE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case EXIT_INIT_CHECK: { STACK_SHRINK(1); break; } - case CALL_BUILTIN_CLASS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_BUILTIN_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_BUILTIN_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_BUILTIN_FAST_WITH_KEYWORDS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_LEN: { STACK_SHRINK(oparg); @@ -1067,6 +890,7 @@ break; } + case CALL_NO_KW_ISINSTANCE: { STACK_SHRINK(oparg); STACK_SHRINK(1); @@ -1074,71 +898,48 @@ break; } - case CALL_NO_KW_LIST_APPEND: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } + case CALL_NO_KW_METHOD_DESCRIPTOR_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - STACK_SHRINK(oparg); - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } + case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); break; } - case INSTRUMENTED_CALL_FUNCTION_EX: { - break; - } - case CALL_FUNCTION_EX: { - STACK_SHRINK(((oparg & 1) ? 1 : 0)); - STACK_SHRINK(2); - stack_pointer[-1] = NULL; - CHECK_EVAL_BREAKER(); - break; - } + case MAKE_FUNCTION: { stack_pointer[-1] = NULL; break; } + case SET_FUNCTION_ATTRIBUTE: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } - case RETURN_GENERATOR: { - break; - } + case BUILD_SLICE: { STACK_SHRINK(((oparg == 3) ? 1 : 0)); @@ -1147,99 +948,79 @@ break; } + case CONVERT_VALUE: { stack_pointer[-1] = NULL; break; } + case FORMAT_SIMPLE: { stack_pointer[-1] = NULL; break; } + case FORMAT_WITH_SPEC: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case COPY: { STACK_GROW(1); stack_pointer[-1] = NULL; break; } + case BINARY_OP: { STACK_SHRINK(1); stack_pointer[-1] = NULL; break; } + case SWAP: { stack_pointer[-2 - (oparg-2)] = NULL; stack_pointer[-1] = NULL; break; } - case INSTRUMENTED_INSTRUCTION: { - break; - } - case INSTRUMENTED_JUMP_FORWARD: { - break; - } - case INSTRUMENTED_JUMP_BACKWARD: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_TRUE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_FALSE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_NONE: { - break; - } - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { - break; - } - case EXTENDED_ARG: { - break; - } - case CACHE: { - break; - } - case RESERVED: { - break; - } + case _POP_JUMP_IF_FALSE: { STACK_SHRINK(1); break; } + case _POP_JUMP_IF_TRUE: { STACK_SHRINK(1); break; } + case JUMP_TO_TOP: { - CHECK_EVAL_BREAKER(); break; } + case SAVE_IP: { break; } + case EXIT_TRACE: { break; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 832beedec05460..10351e7a768a56 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -629,13 +629,13 @@ def write_abstract_interpreter_instructions( case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) self.out.emit("") - with self.out.block(f"case {thing.name}:"): - instr.write(self.out, tier=TIER_TWO) - if instr.check_eval_breaker: - self.out.emit("CHECK_EVAL_BREAKER();") - self.out.emit("break;") - # elif instr.kind != "op": - # print(f"NOTE: {thing.name} is not a viable uop") + if instr.is_viable_uop(): + self.out.emit("") + with self.out.block(f"case {thing.name}:"): + instr.write(self.out, tier=TIER_TWO) + self.out.emit("break;") + # elif instr.kind != "op": + # print(f"NOTE: {thing.name} is not a viable uop") case parsing.Macro(): pass case parsing.Pseudo(): From fdcca9036ba949185c716a3dbfcf48c9fea8e533 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:34:31 +0800 Subject: [PATCH 15/48] Turn on the abstract interpreter --- Include/cpython/optimizer.h | 2 +- Include/internal/pycore_optimizer.h | 4 +- Python/optimizer.c | 14 +- Python/optimizer_analysis.c | 174 +++++++++++++++++++++++- Tools/cases_generator/generate_cases.py | 14 +- Tools/cases_generator/stacking.py | 2 +- 6 files changed, 199 insertions(+), 11 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 2260501bfd608e..5ceb57eb6f34cd 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -22,7 +22,7 @@ typedef struct _PyExecutorObject { typedef struct _PyOptimizerObject _PyOptimizerObject; /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject **); +typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject **, int curr_stackentries); typedef struct _PyOptimizerObject { PyObject_HEAD diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ccbe7e52af289a..2ae657c4e117ff 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,9 @@ extern "C" { #include "pycore_uops.h" -int _Py_uop_analyze_and_optimize(_PyUOpInstruction *trace, int trace_len); +int _Py_uop_analyze_and_optimize(PyCodeObject *code, + _PyUOpInstruction *trace, int trace_len, int curr_stackentries); + #ifdef __cplusplus } diff --git a/Python/optimizer.c b/Python/optimizer.c index 3f9a82cf1daa13..4ca0959eb4a45d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -104,7 +104,8 @@ error_optimize( _PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec) + _PyExecutorObject **exec, + int Py_UNUSED(stack_entries)) { PyErr_Format(PyExc_SystemError, "Should never call error_optimize"); return -1; @@ -165,7 +166,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI } _PyOptimizerObject *opt = interp->optimizer; _PyExecutorObject *executor = NULL; - int err = opt->optimize(opt, code, dest, &executor); + int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); if (err <= 0) { assert(executor == NULL); if (err < 0) { @@ -255,7 +256,9 @@ counter_optimize( _PyOptimizerObject* self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr) + _PyExecutorObject **exec_ptr, + int Py_UNUSED(curr_stackentries) +) { _PyCounterExecutorObject *executor = (_PyCounterExecutorObject *)_PyObject_New(&CounterExecutor_Type); if (executor == NULL) { @@ -691,7 +694,8 @@ uop_optimize( _PyOptimizerObject *self, PyCodeObject *code, _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr) + _PyExecutorObject **exec_ptr, + int curr_stackentries) { _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH); @@ -705,7 +709,7 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - trace_length = _Py_uop_analyze_and_optimize(trace, trace_length); + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { executor->trace[trace_length].opcode = 0; // Sentinel diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index cb399b0beb9129..4bc1295a72a40f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,11 +12,183 @@ #include #include "pycore_optimizer.h" +// TYPENODE is a tagged pointer that uses the last 2 LSB as the tag +#define _Py_PARTITIONNODE_t uintptr_t + +// PARTITIONNODE Tags +typedef enum _Py_TypeNodeTags { + // Node is unused + TYPE_NULL = 0, + // TYPE_ROOT_POSITIVE can point to a root struct or be a NULL + TYPE_ROOT= 1, + // TYPE_REF points to a TYPE_ROOT or a TYPE_REF + TYPE_REF = 2, +} _Py_TypeNodeTags; + +typedef struct _Py_PartitionRootNode { + PyObject_HEAD + // For partial evaluation + uint8_t static_or_dyanmic; + // For types (TODO) +} _Py_PartitionRootNode; + +PyTypeObject _Py_PartitionRootNode_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract interpreter's root node", + .tp_basicsize = sizeof(_Py_PartitionRootNode), + .tp_dealloc = PyObject_Del, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +static inline _Py_PARTITIONNODE_t +partitionnode_get_tag(_Py_PARTITIONNODE_t node) +{ + return node & 0b11; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_clear_tag(_Py_PARTITIONNODE_t node) +{ + return node & (~(uintptr_t)(0b11)); +} + +static inline _Py_PARTITIONNODE_t +partitionnode_make_root(uint8_t static_or_dynamic) +{ + _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); + if (root == NULL) { + return 0; + } + root->static_or_dyanmic = static_or_dynamic; + return (_Py_PARTITIONNODE_t)root; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_make_ref(_Py_PARTITIONNODE_t node) +{ + return partitionnode_clear_tag(node) | TYPE_REF; +} + +static inline _Py_PARTITIONNODE_t +partitionnode_null() +{ + return 0; +} + + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // points to one element after the abstract stack + _Py_PARTITIONNODE_t *stack_pointer; + int stack_len; + _Py_PARTITIONNODE_t *stack; + int locals_len; + _Py_PARTITIONNODE_t *locals; +} _Py_UOpsAbstractInterpContext; + +static void +abstractinterp_dealloc(PyObject *o) +{ + _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; + PyMem_Free(self->stack); + PyMem_Free(self->locals); + // TODO traverse the nodes and decref all roots too. + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyTypeObject _Py_UOpsAbstractInterpContext_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "uops abstract interpreter's context", + .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), + .tp_dealloc = abstractinterp_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION +}; + +_Py_UOpsAbstractInterpContext * +_Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stacklen) +{ + _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)PyType_GenericAlloc( + (PyTypeObject *)&_Py_UOpsAbstractInterpContext_Type, 0); + if (self == NULL) { + return NULL; + } + + // Setup + self->stack_len = stack_len; + self->locals_len = locals_len; + + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, locals_len + stack_len); + if (locals_with_stack == NULL) { + Py_DECREF(self); + return NULL; + } + + + for (int i = 0; i < locals_len + stack_len; i++) { + locals_with_stack[i] = partitionnode_null(); + } + + self->locals = locals_with_stack; + self->stack = locals_with_stack + locals_len; + self->stack_pointer = self->stack + curr_stacklen; + + return self; +} + int _Py_uop_analyze_and_optimize( + PyCodeObject *co, _PyUOpInstruction *trace, - int trace_len + int trace_len, + int curr_stacklen ) { +#define STACK_LEVEL() ((int)(stack_pointer - ctx->stack)) +#define STACK_SIZE() (co->co_stacksize) +#define BASIC_STACKADJ(n) (stack_pointer += n) + +#ifdef Py_DEBUG +#define STACK_GROW(n) do { \ + assert(n >= 0); \ + BASIC_STACKADJ(n); \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) +#define STACK_SHRINK(n) do { \ + assert(n >= 0); \ + assert(STACK_LEVEL() >= n); \ + BASIC_STACKADJ(-(n)); \ + } while (0) +#else +#define STACK_GROW(n) BASIC_STACKADJ(n) +#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) +#endif + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); + if (temp_writebuffer == NULL) { + return trace_len; + } + + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); + if (ctx == NULL) { + PyMem_Free(temp_writebuffer); + return trace_len; + } + + int oparg; + int opcode; + _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; + for (int i = 0; i < trace_len; i++) { + oparg = trace[i].oparg; + opcode = trace[i].opcode; + switch (opcode) { +#include "abstract_interp_cases.c.h" + default: + fprintf(stderr, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); + } + ctx->stack_pointer = stack_pointer; + + } + assert(STACK_SIZE() >= 0); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 10351e7a768a56..636f8b0600d052 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -61,6 +61,17 @@ INSTR_FMT_PREFIX = "INSTR_FMT_" +# @TODO generate all these after updating the DSL +SPECIALLY_HANDLED_ABSTRACT_INSTR = { + # "LOAD_FAST", + # "LOAD_FAST_CHECK", + # "LOAD_FAST_AND_CLEAR", + # "LOAD_CONST", + # "STORE_FAST", + # "STORE_FAST_MAYBE_NULL", + # "COPY", +} + arg_parser = argparse.ArgumentParser( description="Generate the code for the interpreter switch.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -628,8 +639,7 @@ def write_abstract_interpreter_instructions( self.write_overridden_instr_place_holder(thing) case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) - self.out.emit("") - if instr.is_viable_uop(): + if instr.is_viable_uop() and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR: self.out.emit("") with self.out.block(f"case {thing.name}:"): instr.write(self.out, tier=TIER_TWO) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 8ae08f70904305..8c8b5d09e4fcda 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -432,5 +432,5 @@ def _write_components_for_abstract_interp( poke.effect.cond, poke.effect.size, ), - StackEffect("NULL"), + StackEffect("partitionnode_null()"), ) From 7632ed1ffdfc0049ab48a4db444d6944fc98689f Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 03:46:53 +0800 Subject: [PATCH 16/48] (leaky) data structures for constant propagation --- Python/optimizer_analysis.c | 316 +++++++++++++++++++++++- Tools/cases_generator/generate_cases.py | 14 +- Tools/cases_generator/stacking.py | 19 +- 3 files changed, 322 insertions(+), 27 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4bc1295a72a40f..19543df8bcf8f4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -29,18 +29,28 @@ typedef struct _Py_PartitionRootNode { PyObject_HEAD // For partial evaluation uint8_t static_or_dyanmic; + PyObject *const_val; // For types (TODO) } _Py_PartitionRootNode; +static void +partitionnode_dealloc(PyObject *o) +{ + _Py_PartitionRootNode *self = (_Py_PartitionRootNode *)o; + Py_CLEAR(self->const_val); + Py_TYPE(self)->tp_free(o); +} + PyTypeObject _Py_PartitionRootNode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's root node", .tp_basicsize = sizeof(_Py_PartitionRootNode), - .tp_dealloc = PyObject_Del, + .tp_dealloc = partitionnode_dealloc, + .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; -static inline _Py_PARTITIONNODE_t +static inline _Py_TypeNodeTags partitionnode_get_tag(_Py_PARTITIONNODE_t node) { return node & 0b11; @@ -52,14 +62,20 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) return node & (~(uintptr_t)(0b11)); } +// static_or_dynamic +// 0 - static +// 1 - dynamic +// If static, const_value must be set! static inline _Py_PARTITIONNODE_t -partitionnode_make_root(uint8_t static_or_dynamic) +partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); if (root == NULL) { return 0; } root->static_or_dyanmic = static_or_dynamic; + root->const_val = Py_NewRef(const_val); + fprintf(stderr, "allocating ROOT\n"); return (_Py_PARTITIONNODE_t)root; } @@ -69,11 +85,8 @@ partitionnode_make_ref(_Py_PARTITIONNODE_t node) return partitionnode_clear_tag(node) | TYPE_REF; } -static inline _Py_PARTITIONNODE_t -partitionnode_null() -{ - return 0; -} + +static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; // Tier 2 types meta interpreter @@ -91,9 +104,20 @@ static void abstractinterp_dealloc(PyObject *o) { _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; - PyMem_Free(self->stack); + // Traverse all nodes and decref the root objects (if they are not NULL). + // Note: stack is after locals so this is safe + int total = self->locals_len + self->stack_len; + for (int i = 0; i < total; i++) { + _Py_PARTITIONNODE_t node = self->locals[i]; + if (partitionnode_get_tag(node) == TYPE_ROOT) { + if (node != PARTITIONNODE_NULLROOT) { + fprintf(stderr, "DEALLOCATING ROOT\n"); + } + Py_XDECREF(partitionnode_clear_tag(node)); + } + } PyMem_Free(self->locals); - // TODO traverse the nodes and decref all roots too. + // No need to free stack because it is allocated together with the locals. Py_TYPE(self)->tp_free((PyObject *)self); } @@ -102,6 +126,7 @@ PyTypeObject _Py_UOpsAbstractInterpContext_Type = { .tp_name = "uops abstract interpreter's context", .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), .tp_dealloc = abstractinterp_dealloc, + .tp_free = PyObject_Free, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION }; @@ -126,7 +151,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl for (int i = 0; i < locals_len + stack_len; i++) { - locals_with_stack[i] = partitionnode_null(); + locals_with_stack[i] = PARTITIONNODE_NULLROOT; } self->locals = locals_with_stack; @@ -136,6 +161,226 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return self; } +static inline _Py_PARTITIONNODE_t * +partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) +{ + _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); + while (tag != TYPE_ROOT) { + ref = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*ref)); + tag = partitionnode_get_tag(*ref); + } + return ref; +} + +/** + * @brief Performs SET operation. dst tree becomes part of src tree + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the type_context. Otherwise, it is interpreted as a pointer + * to a _Py_PARTITIONNODE_t. + * + * If src_is_new: + * Overwrites the root of the dst tree with the src node + * else: + * Makes the root of the dst tree a TYPE_REF to src + * +*/ +static void +partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) +{ + { + +#ifdef Py_DEBUG + // If `src_is_new` is set: + // - `src` doesn't belong inside the type context yet. + // - `src` has to be a TYPE_ROOT + // - `src` is to be interpreted as a _Py_TYPENODE_t + if (src_is_new) { + assert(partitionnode_get_tag(*src) == TYPE_ROOT); + } +#endif + + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); + switch (tag) { + case TYPE_ROOT: { + _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + Py_XDECREF(old_root); + break; + } + // Make dst the src + *dst = *src; + Py_XDECREF(old_root); + break; + } + case TYPE_REF: { + _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); + _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); + if (!src_is_new) { + // Traverse up to the root of dst, make root a reference to src + *rootptr = partitionnode_make_ref(*src); + // Old root no longer used. + Py_XDECREF(old_root); + break; + } + // Make root of dst the src + *rootptr = *src; + // Old root no longer used. + Py_XDECREF(old_root); + break; + } + default: + Py_UNREACHABLE(); + } + } +} + + +/** + * @brief Performs OVERWRITE operation. dst node gets overwritten by src node + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the ctx. Otherwise, it is interpreted as a pointer + * to a _Py_PARTITIONNODE_t. + * + * If src_is_new: + * Removes dst node from its tree (+fixes all the references to dst) + * Overwrite the dst node with the src node + * else: + * Removes dst node from its tree (+fixes all the references to dst) + * Makes the root of the dst tree a TYPE_REF to src + * +*/ +static void +partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, + _Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) +{ +#ifdef Py_DEBUG + if (src_is_new) { + assert(partitionnode_get_tag(*src) == TYPE_ROOT); + } +#endif + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); + switch (tag) { + case TYPE_ROOT: { + + _Py_PARTITIONNODE_t old_dst = *dst; + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + } + else { + // Make dst the src + *dst = *src; + } + + // No longer need the old root. + Py_XDECREF(partitionnode_clear_tag(old_dst)); + + /* Pick one child of dst and make that the new root of the dst tree */ + + // Children of dst will have this form + _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( + partitionnode_clear_tag(*dst)); + // Will be initialised to the first child we find (ptr to the new root) + _Py_PARTITIONNODE_t *new_root_ptr = NULL; + + // Search locals for children + int nlocals = ctx->locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = *dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = partitionnode_make_ref(*new_root_ptr); + } + } + } + + // Search stack for children + int nstack = ctx->stack_len; + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = *dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = partitionnode_make_ref(*new_root_ptr); + } + } + } + + break; + } + case TYPE_REF: { + + _Py_PARTITIONNODE_t old_dst = *dst; + // Make dst a reference to src + if (!src_is_new) { + // Make dst a reference to src + *dst = partitionnode_make_ref(*src); + } + else { + // Make dst the src + *dst = *src; + } + + /* Make all child of src be a reference to the parent of dst */ + + // Children of dst will have this form + _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( + partitionnode_clear_tag(*dst)); + + // Search locals for children + int nlocals = ctx->locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + + // Search stack for children + int nstack = ctx->stack_len; + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + break; + } + default: + Py_UNREACHABLE(); + } +} + + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#else +static inline PyObject * +GETITEM(PyObject *v, Py_ssize_t i) { + assert(PyTuple_Check(v)); + assert(i >= 0); + assert(i < PyTuple_GET_SIZE(v)); + return PyTuple_GET_ITEM(v, i); +} +#endif + int _Py_uop_analyze_and_optimize( PyCodeObject *co, @@ -163,6 +408,12 @@ _Py_uop_analyze_and_optimize( #define STACK_GROW(n) BASIC_STACKADJ(n) #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) #endif +#define PEEK(idx) (&(stack_pointer[-(idx)])) +#define GETLOCAL(idx) (&(locals[idx])) + +#define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) +#define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) +#define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); if (temp_writebuffer == NULL) { return trace_len; @@ -177,11 +428,52 @@ _Py_uop_analyze_and_optimize( int oparg; int opcode; _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; + _Py_PARTITIONNODE_t *locals = ctx->locals; for (int i = 0; i < trace_len; i++) { oparg = trace[i].oparg; opcode = trace[i].opcode; + /* + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", + */ switch (opcode) { #include "abstract_interp_cases.c.h" + // @TODO convert these to autogenerated using DSL + case LOAD_FAST: + case LOAD_FAST_CHECK: + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); + break; + case LOAD_FAST_AND_CLEAR: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); + PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); + break; + } + case LOAD_CONST: { + _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + STACK_GROW(1); + PARTITIONNODE_OVERWRITE(&value, PEEK(1), false); + break; + } + case STORE_FAST: + case STORE_FAST_MAYBE_NULL: { + _Py_PARTITIONNODE_t *value = PEEK(1); + PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); + STACK_SHRINK(1); + break; + } + case COPY: { + _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); + STACK_GROW(1); + PARTITIONNODE_SET(bottom, PEEK(1), false); + break; + } default: fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); @@ -190,5 +482,7 @@ _Py_uop_analyze_and_optimize( } assert(STACK_SIZE() >= 0); + Py_DECREF(ctx); + PyMem_Free(temp_writebuffer); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 636f8b0600d052..58da498f0d1d26 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -63,13 +63,13 @@ # @TODO generate all these after updating the DSL SPECIALLY_HANDLED_ABSTRACT_INSTR = { - # "LOAD_FAST", - # "LOAD_FAST_CHECK", - # "LOAD_FAST_AND_CLEAR", - # "LOAD_CONST", - # "STORE_FAST", - # "STORE_FAST_MAYBE_NULL", - # "COPY", + "LOAD_FAST", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", } arg_parser = argparse.ArgumentParser( diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 8c8b5d09e4fcda..eb54dd65a394d9 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -425,12 +425,13 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.assign( - StackEffect( - poke.as_variable(), - poke.effect.type, - poke.effect.cond, - poke.effect.size, - ), - StackEffect("partitionnode_null()"), - ) + out.emit(f"PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") + # out.assign( + # StackEffect( + # poke.as_variable(), + # poke.effect.type, + # poke.effect.cond, + # poke.effect.size, + # ), + # StackEffect("partitionnode_nullroot()"), + # ) From 0d0c4c45e5b4aa19da4bb06a59f7fe4d780c6e8a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 16:59:27 +0800 Subject: [PATCH 17/48] (with cycles) try to fix the type prop Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 182 +++++++++++++++++++++++------- Tools/cases_generator/stacking.py | 2 +- 2 files changed, 144 insertions(+), 40 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 19543df8bcf8f4..378c321767dbdd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,6 +12,8 @@ #include #include "pycore_optimizer.h" +#define PARTITION_DEBUG 1 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -66,7 +68,7 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) // 0 - static // 1 - dynamic // If static, const_value must be set! -static inline _Py_PARTITIONNODE_t +static _Py_PARTITIONNODE_t partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); @@ -75,20 +77,18 @@ partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) } root->static_or_dyanmic = static_or_dynamic; root->const_val = Py_NewRef(const_val); - fprintf(stderr, "allocating ROOT\n"); return (_Py_PARTITIONNODE_t)root; } static inline _Py_PARTITIONNODE_t -partitionnode_make_ref(_Py_PARTITIONNODE_t node) +partitionnode_make_ref(_Py_PARTITIONNODE_t *node) { - return partitionnode_clear_tag(node) | TYPE_REF; + return partitionnode_clear_tag((_Py_PARTITIONNODE_t)node) | TYPE_REF; } static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; - // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD @@ -110,9 +110,6 @@ abstractinterp_dealloc(PyObject *o) for (int i = 0; i < total; i++) { _Py_PARTITIONNODE_t node = self->locals[i]; if (partitionnode_get_tag(node) == TYPE_ROOT) { - if (node != PARTITIONNODE_NULLROOT) { - fprintf(stderr, "DEALLOCATING ROOT\n"); - } Py_XDECREF(partitionnode_clear_tag(node)); } } @@ -161,7 +158,11 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return self; } -static inline _Py_PARTITIONNODE_t * +#if PARTITION_DEBUG +static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); +#endif + +static _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -204,31 +205,27 @@ partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_i switch (tag) { case TYPE_ROOT: { _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); + Py_XDECREF(old_root); if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); - Py_XDECREF(old_root); + *dst = partitionnode_make_ref(src); break; } // Make dst the src *dst = *src; - Py_XDECREF(old_root); break; } case TYPE_REF: { _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); + Py_XDECREF(old_root); if (!src_is_new) { // Traverse up to the root of dst, make root a reference to src - *rootptr = partitionnode_make_ref(*src); - // Old root no longer used. - Py_XDECREF(old_root); + *rootptr = partitionnode_make_ref(src); break; } // Make root of dst the src *rootptr = *src; - // Old root no longer used. - Py_XDECREF(old_root); break; } default: @@ -259,7 +256,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, { #ifdef Py_DEBUG if (src_is_new) { - assert(partitionnode_get_tag(*src) == TYPE_ROOT); + assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); } #endif _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); @@ -269,37 +266,40 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, _Py_PARTITIONNODE_t old_dst = *dst; if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); + *dst = partitionnode_make_ref(src); + assert(partitionnode_get_tag(*dst) == TYPE_REF); + assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); + fprintf(stderr, "START\n"); + print_ctx(ctx); } else { // Make dst the src - *dst = *src; + *dst = (_Py_PARTITIONNODE_t)src; } - // No longer need the old root. - Py_XDECREF(partitionnode_clear_tag(old_dst)); /* Pick one child of dst and make that the new root of the dst tree */ // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - partitionnode_clear_tag(*dst)); - // Will be initialised to the first child we find (ptr to the new root) - _Py_PARTITIONNODE_t *new_root_ptr = NULL; + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); + // Will be initialised to the first child we find + _Py_PARTITIONNODE_t *new_root = (_Py_PARTITIONNODE_t *)NULL; // Search locals for children int nlocals = ctx->locals_len; for (int i = 0; i < nlocals; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); if (*node_ptr == child_test) { - if (new_root_ptr == NULL) { + if (new_root == (_Py_PARTITIONNODE_t)NULL) { // First child encountered! initialise root - new_root_ptr = node_ptr; - *node_ptr = *dst; + new_root = node_ptr; + *node_ptr = old_dst; + Py_XINCREF(partitionnode_clear_tag(old_dst)); } else { // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(*new_root_ptr); + *node_ptr = partitionnode_make_ref(new_root); } } } @@ -309,18 +309,23 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nstack; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); if (*node_ptr == child_test) { - if (new_root_ptr == NULL) { + if (new_root == (_Py_PARTITIONNODE_t)NULL) { // First child encountered! initialise root - new_root_ptr = node_ptr; - *node_ptr = *dst; + new_root = node_ptr; + *node_ptr = old_dst; + Py_XINCREF(partitionnode_clear_tag(old_dst)); } else { // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(*new_root_ptr); + *node_ptr = partitionnode_make_ref(new_root); } } } + // This ndoe is no longer referencing the old root. + Py_XDECREF(partitionnode_clear_tag(old_dst)); + fprintf(stderr, "END\n"); + print_ctx(ctx); break; } case TYPE_REF: { @@ -329,18 +334,20 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // Make dst a reference to src if (!src_is_new) { // Make dst a reference to src - *dst = partitionnode_make_ref(*src); + *dst = partitionnode_make_ref(src); + assert(partitionnode_get_tag(*dst) == TYPE_REF); + assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); } else { // Make dst the src - *dst = *src; + *dst = (_Py_PARTITIONNODE_t)src; } /* Make all child of src be a reference to the parent of dst */ // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - partitionnode_clear_tag(*dst)); + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag(*dst)); // Search locals for children int nlocals = ctx->locals_len; @@ -368,6 +375,100 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, } } +#if PARTITION_DEBUG + +/** + * @brief Print the entries in the abstract interpreter context (along with locals). +*/ +static void +print_ctx(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_PARTITIONNODE_t *locals = ctx->locals; + _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; + + int nstack_use = (int)(stackptr - ctx->stack); + int nstack = ctx->stack_len; + int nlocals = ctx->locals_len; + + bool is_local = false; + bool is_stack = false; + + int locals_offset = -1; + int stack_offset = -1; + int parent_idx = -1; + + fprintf(stderr, " Stack: %p: [", ctx->stack); + for (int i = 0; i < nstack; i++) { + _Py_PARTITIONNODE_t *node = &ctx->stack[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + fprintf(stderr, "%s", i == nstack_use ? "." : " "); + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + is_local = parent >= ctx->locals && parent < ctx->stack; + is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + parent_idx = is_local + ? (int)(parent - ctx->locals) + : is_stack + ? (int)(parent - ctx->locals) + : -1; + } + + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s", + ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } + } + fprintf(stderr, "]\n"); + + fprintf(stderr, " Locals %p: [", locals); + for (int i = 0; i < nlocals; i++) { + _Py_PARTITIONNODE_t *node = &ctx->locals[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + is_local = parent >= ctx->locals && parent < ctx->stack; + is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + parent_idx = is_local + ? (int)(parent - ctx->locals) + : is_stack + ? (int)(parent - ctx->locals) + : -1; + } + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s", + ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } + } + fprintf(stderr, "]\n"); +} +#endif #ifndef Py_DEBUG #define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) @@ -398,6 +499,7 @@ _Py_uop_analyze_and_optimize( assert(n >= 0); \ BASIC_STACKADJ(n); \ assert(STACK_LEVEL() <= STACK_SIZE()); \ + ctx->stack_pointer = stack_pointer; \ } while (0) #define STACK_SHRINK(n) do { \ assert(n >= 0); \ @@ -458,7 +560,10 @@ _Py_uop_analyze_and_optimize( case LOAD_CONST: { _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); STACK_GROW(1); - PARTITIONNODE_OVERWRITE(&value, PEEK(1), false); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); +#if PARTITION_DEBUG + print_ctx(ctx); +#endif break; } case STORE_FAST: @@ -478,8 +583,7 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - ctx->stack_pointer = stack_pointer; - + //print_ctx(ctx); } assert(STACK_SIZE() >= 0); Py_DECREF(ctx); diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index eb54dd65a394d9..d2b178ea908757 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -425,7 +425,7 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.emit(f"PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") + out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") # out.assign( # StackEffect( # poke.as_variable(), From 4c8953e437646efbaf59b5590e9489b83e3fb466 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:19:35 +0800 Subject: [PATCH 18/48] fix: cycles Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 378c321767dbdd..01723afe7f55af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -173,6 +173,35 @@ partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) return ref; } +/** + * @brief Checks if two nodes are in the same partition. +*/ +static bool +partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) +{ + _Py_PARTITIONNODE_t *x_rootref = x; + _Py_PARTITIONNODE_t *y_rootref = y; + uintptr_t x_tag = partitionnode_get_tag(*x); + uintptr_t y_tag = partitionnode_get_tag(*y); + switch (y_tag) { + case TYPE_REF: + y_rootref = partitionnode_get_rootptr(y); + case TYPE_ROOT: + break; + default: + Py_UNREACHABLE(); + } + switch (x_tag) { + case TYPE_REF: + x_rootref = partitionnode_get_rootptr(x); + case TYPE_ROOT: + break; + default: + Py_UNREACHABLE(); + } + return x_rootref == y_rootref; +} + /** * @brief Performs SET operation. dst tree becomes part of src tree * @@ -201,6 +230,11 @@ partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_i } #endif + // This prevents cycles from forming + if (!src_is_new && partitionnode_is_same_partition(src, dst)) { + return; + } + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); switch (tag) { case TYPE_ROOT: { @@ -259,6 +293,12 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); } #endif + + // This prevents cycles from forming + if (!src_is_new && partitionnode_is_same_partition(src, dst)) { + return; + } + _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); switch (tag) { case TYPE_ROOT: { From 3bd36fa49ea5f3493e589a56af850a3c1aa4115e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:21:46 +0800 Subject: [PATCH 19/48] cleanup Co-Authored-By: Jules <57632293+juliapoo@users.noreply.github.com> --- Python/optimizer_analysis.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 01723afe7f55af..461f4f010d9b48 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -309,8 +309,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, *dst = partitionnode_make_ref(src); assert(partitionnode_get_tag(*dst) == TYPE_REF); assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - fprintf(stderr, "START\n"); - print_ctx(ctx); } else { // Make dst the src @@ -364,8 +362,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // This ndoe is no longer referencing the old root. Py_XDECREF(partitionnode_clear_tag(old_dst)); - fprintf(stderr, "END\n"); - print_ctx(ctx); break; } case TYPE_REF: { @@ -575,13 +571,14 @@ _Py_uop_analyze_and_optimize( oparg = trace[i].oparg; opcode = trace[i].opcode; /* + * The following are special cased: "LOAD_FAST", - "LOAD_FAST_CHECK", - "LOAD_FAST_AND_CLEAR", - "LOAD_CONST", - "STORE_FAST", - "STORE_FAST_MAYBE_NULL", - "COPY", + "LOAD_FAST_CHECK", + "LOAD_FAST_AND_CLEAR", + "LOAD_CONST", + "STORE_FAST", + "STORE_FAST_MAYBE_NULL", + "COPY", */ switch (opcode) { #include "abstract_interp_cases.c.h" @@ -623,7 +620,9 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - //print_ctx(ctx); +#if PARTITION_DEBUG + print_ctx(ctx); +#endif } assert(STACK_SIZE() >= 0); Py_DECREF(ctx); From 229097fa7b427799aec8623eb3a3f5658bf090ef Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:34:21 +0800 Subject: [PATCH 20/48] Fix+Refactor: Handling of root nodes in special-cased type prop (#40) * Fix+Refactor: Handling of root nodes in special-cased type prop * Style: Removed trailing space --- Python/optimizer_analysis.c | 62 ++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 461f4f010d9b48..1f3425f15fe009 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -77,7 +77,7 @@ partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) } root->static_or_dyanmic = static_or_dynamic; root->const_val = Py_NewRef(const_val); - return (_Py_PARTITIONNODE_t)root; + return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; } static inline _Py_PARTITIONNODE_t @@ -179,27 +179,7 @@ partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) static bool partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) { - _Py_PARTITIONNODE_t *x_rootref = x; - _Py_PARTITIONNODE_t *y_rootref = y; - uintptr_t x_tag = partitionnode_get_tag(*x); - uintptr_t y_tag = partitionnode_get_tag(*y); - switch (y_tag) { - case TYPE_REF: - y_rootref = partitionnode_get_rootptr(y); - case TYPE_ROOT: - break; - default: - Py_UNREACHABLE(); - } - switch (x_tag) { - case TYPE_REF: - x_rootref = partitionnode_get_rootptr(x); - case TYPE_ROOT: - break; - default: - Py_UNREACHABLE(); - } - return x_rootref == y_rootref; + return partitionnode_get_rootptr(x) == partitionnode_get_rootptr(y); } /** @@ -307,7 +287,6 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, if (!src_is_new) { // Make dst a reference to src *dst = partitionnode_make_ref(src); - assert(partitionnode_get_tag(*dst) == TYPE_REF); assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); } else { @@ -444,19 +423,21 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) if (tag == TYPE_REF) { _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - is_local = parent >= ctx->locals && parent < ctx->stack; - is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; parent_idx = is_local - ? (int)(parent - ctx->locals) + ? local_index : is_stack - ? (int)(parent - ctx->locals) + ? stack_index : -1; } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -479,18 +460,20 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) if (tag == TYPE_REF) { _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - is_local = parent >= ctx->locals && parent < ctx->stack; - is_stack = parent >= ctx->stack && parent < (ctx->stack + nstack); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; parent_idx = is_local - ? (int)(parent - ctx->locals) + ? local_index : is_stack - ? (int)(parent - ctx->locals) + ? stack_index : -1; } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic ? "dynamic" : "static")); + ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -570,6 +553,13 @@ _Py_uop_analyze_and_optimize( for (int i = 0; i < trace_len; i++) { oparg = trace[i].oparg; opcode = trace[i].opcode; +#ifdef PARTITION_DEBUG +#ifdef Py_DEBUG + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + opcode, oparg); +#endif +#endif /* * The following are special cased: "LOAD_FAST", @@ -591,13 +581,13 @@ _Py_uop_analyze_and_optimize( case LOAD_FAST_AND_CLEAR: { STACK_GROW(1); PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, GETLOCAL(oparg), true); break; } case LOAD_CONST: { - _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); STACK_GROW(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); + PARTITIONNODE_OVERWRITE(value, PEEK(1), true); #if PARTITION_DEBUG print_ctx(ctx); #endif From ca0fab79b2cb84b09fc7f4e9bf9aaca6516c6423 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 17:45:57 +0800 Subject: [PATCH 21/48] partially partially evaluate --- Python/abstract_interp_cases.c.h | 473 +++++------------------- Python/bytecodes.c | 9 + Python/executor_cases.c.h | 10 + Python/optimizer_analysis.c | 327 ++++++++++++++-- Tools/cases_generator/generate_cases.py | 8 +- 5 files changed, 433 insertions(+), 394 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 33e726cc78e17d..6e8448d3dc4412 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -3,1024 +3,753 @@ // Python/bytecodes.c // Do not edit! - case NOP: { break; } - - - - case LOAD_FAST_CHECK: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case LOAD_FAST: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case LOAD_FAST_AND_CLEAR: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - - case LOAD_CONST: { - STACK_GROW(1); - stack_pointer[-1] = NULL; - break; - } - - - case STORE_FAST: { - STACK_SHRINK(1); - break; - } - - - - case POP_TOP: { STACK_SHRINK(1); break; } - case PUSH_NULL: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case END_SEND: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case UNARY_NEGATIVE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case UNARY_NOT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_BOOL: { break; } - case TO_BOOL_INT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_LIST: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_NONE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_STR: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case TO_BOOL_ALWAYS_TRUE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case UNARY_INVERT: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_BOTH_INT: { break; } - - case _BINARY_OP_MULTIPLY_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case _BINARY_OP_ADD_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case _BINARY_OP_SUBTRACT_INT: { - STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - case _GUARD_BOTH_FLOAT: { break; } - case _BINARY_OP_MULTIPLY_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _BINARY_OP_ADD_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _BINARY_OP_SUBTRACT_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_BOTH_UNICODE: { break; } - case _BINARY_OP_ADD_UNICODE: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case BINARY_SUBSCR: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SLICE: { STACK_SHRINK(2); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_SLICE: { STACK_SHRINK(4); break; } - case BINARY_SUBSCR_LIST_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_SUBSCR_DICT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case LIST_APPEND: { STACK_SHRINK(1); break; } - case SET_ADD: { STACK_SHRINK(1); break; } - case STORE_SUBSCR: { STACK_SHRINK(3); break; } - case STORE_SUBSCR_LIST_INT: { STACK_SHRINK(3); break; } - case STORE_SUBSCR_DICT: { STACK_SHRINK(3); break; } - case DELETE_SUBSCR: { STACK_SHRINK(2); break; } - case CALL_INTRINSIC_1: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_INTRINSIC_2: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - case GET_AITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_ANEXT: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_AWAITABLE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - case POP_EXCEPT: { STACK_SHRINK(1); break; } - - - - case LOAD_ASSERTION_ERROR: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_BUILD_CLASS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_NAME: { STACK_SHRINK(1); break; } - case DELETE_NAME: { break; } - case UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_TWO_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_TUPLE: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_SEQUENCE_LIST: { STACK_SHRINK(1); STACK_GROW(oparg); break; } - case UNPACK_EX: { STACK_GROW((oparg & 0xFF) + (oparg >> 8)); - stack_pointer[-1 - (oparg >> 8)] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg >> 8))), true); break; } - case STORE_ATTR: { STACK_SHRINK(2); break; } - case DELETE_ATTR: { STACK_SHRINK(1); break; } - case STORE_GLOBAL: { STACK_SHRINK(1); break; } - case DELETE_GLOBAL: { break; } - case _LOAD_LOCALS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _LOAD_FROM_DICT_OR_GLOBALS: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_GLOBAL: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_GLOBALS_VERSION: { break; } - case _GUARD_BUILTINS_VERSION: { break; } - case _LOAD_GLOBAL_MODULE: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _LOAD_GLOBAL_BUILTINS: { STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case DELETE_FAST: { break; } - - case DELETE_DEREF: { break; } - case LOAD_FROM_DICT_OR_DEREF: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_DEREF: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case STORE_DEREF: { STACK_SHRINK(1); break; } - case COPY_FREE_VARS: { break; } - case BUILD_STRING: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_TUPLE: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_LIST: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LIST_EXTEND: { STACK_SHRINK(1); break; } - case SET_UPDATE: { STACK_SHRINK(1); break; } - case BUILD_SET: { STACK_SHRINK(oparg); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BUILD_MAP: { STACK_SHRINK(oparg*2); STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SETUP_ANNOTATIONS: { break; } - case BUILD_CONST_KEY_MAP: { STACK_SHRINK(oparg); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case DICT_UPDATE: { STACK_SHRINK(1); break; } - case DICT_MERGE: { STACK_SHRINK(1); break; } - case MAP_ADD: { STACK_SHRINK(2); break; } - - - case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_SUPER_ATTR_METHOD: { STACK_SHRINK(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _GUARD_TYPE_VERSION: { break; } - case _CHECK_MANAGED_OBJECT_HAS_VALUES: { break; } - case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); - stack_pointer[-1 - (oparg & 1 ? 1 : 0)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - case COMPARE_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_FLOAT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_INT: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case COMPARE_OP_STR: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case IS_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CONTAINS_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CHECK_EG_MATCH: { - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CHECK_EXC_MATCH: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - case IS_NONE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case GET_LEN: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_CLASS: { STACK_SHRINK(2); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_MAPPING: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_SEQUENCE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case MATCH_KEYS: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_ITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case GET_YIELD_FROM_ITER: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - case _ITER_CHECK_LIST: { break; } - - case _IS_ITER_EXHAUSTED_LIST: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_LIST: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_CHECK_TUPLE: { break; } - - case _IS_ITER_EXHAUSTED_TUPLE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_TUPLE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_CHECK_RANGE: { break; } - - case _IS_ITER_EXHAUSTED_RANGE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case _ITER_NEXT_RANGE: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - case WITH_EXCEPT_START: { STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case PUSH_EXC_INFO: { STACK_GROW(1); - stack_pointer[-2] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - - - case CALL_NO_KW_TYPE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_STR_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_TUPLE_1: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case EXIT_INIT_CHECK: { STACK_SHRINK(1); break; } - - case CALL_NO_KW_BUILTIN_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_BUILTIN_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_LEN: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_ISINSTANCE: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_METHOD_DESCRIPTOR_O: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: { STACK_SHRINK(oparg); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - case MAKE_FUNCTION: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SET_FUNCTION_ATTRIBUTE: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - case BUILD_SLICE: { STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case CONVERT_VALUE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case FORMAT_SIMPLE: { - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case FORMAT_WITH_SPEC: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; - break; - } - - - case COPY: { - STACK_GROW(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case BINARY_OP: { STACK_SHRINK(1); - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - case SWAP: { - stack_pointer[-2 - (oparg-2)] = NULL; - stack_pointer[-1] = NULL; + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2 - (oparg-2))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; } - - - - - - - - - - - case _POP_JUMP_IF_FALSE: { STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { STACK_SHRINK(1); break; } - case JUMP_TO_TOP: { break; } - case SAVE_IP: { break; } - case EXIT_TRACE: { break; } + + case INSERT: { + break; + } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 90e26d3c86b380..1d9f36248fa65e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3755,6 +3755,15 @@ dummy_func( return frame; } + op(INSERT, (--)) { + // Inserts TOS at position specified by oparg + PyObject *tos = TOP(); + for (int i = 1; i < oparg + 1; i++) { + stack_pointer[i] = stack_pointer[i - 1]; + } + POKE(oparg, tos); + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9363b4955087db..a66a3a1d715648 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2717,3 +2717,13 @@ return frame; break; } + + case INSERT: { + // Inserts TOS at position specified by oparg + PyObject *tos = TOP(); + for (int i = 1; i < oparg + 1; i++) { + stack_pointer[i] = stack_pointer[i - 1]; + } + POKE(oparg, tos); + break; + } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 1f3425f15fe009..5134d5a0baaf3f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -6,6 +6,7 @@ #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uops.h" +#include "pycore_long.h" #include "cpython/optimizer.h" #include #include @@ -30,7 +31,9 @@ typedef enum _Py_TypeNodeTags { typedef struct _Py_PartitionRootNode { PyObject_HEAD // For partial evaluation - uint8_t static_or_dyanmic; + // 0 - static + // 1 - dynamic + uint8_t static_or_dynamic; PyObject *const_val; // For types (TODO) } _Py_PartitionRootNode; @@ -68,14 +71,14 @@ partitionnode_clear_tag(_Py_PARTITIONNODE_t node) // 0 - static // 1 - dynamic // If static, const_value must be set! -static _Py_PARTITIONNODE_t +static inline _Py_PARTITIONNODE_t partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) { _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); if (root == NULL) { return 0; } - root->static_or_dyanmic = static_or_dynamic; + root->static_or_dynamic = static_or_dynamic; root->const_val = Py_NewRef(const_val); return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; } @@ -92,12 +95,18 @@ static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NUL // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { PyObject_HEAD + // The following are abstract stack and locals. // points to one element after the abstract stack _Py_PARTITIONNODE_t *stack_pointer; int stack_len; _Py_PARTITIONNODE_t *stack; int locals_len; _Py_PARTITIONNODE_t *locals; + // The following represent the real (emitted instructions) stack and locals. + // points to one element after the abstract stack + _Py_PARTITIONNODE_t *real_stack_pointer; + _Py_PARTITIONNODE_t *real_stack; + _Py_PARTITIONNODE_t *real_locals; } _Py_UOpsAbstractInterpContext; static void @@ -140,14 +149,15 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack_len = stack_len; self->locals_len = locals_len; - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, locals_len + stack_len); + // Double the size needed because we also need a representation for the real stack and locals. + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len) * 2); if (locals_with_stack == NULL) { Py_DECREF(self); return NULL; } - for (int i = 0; i < locals_len + stack_len; i++) { + for (int i = 0; i < (locals_len + stack_len) * 2; i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; } @@ -155,6 +165,9 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; + self->real_locals = self->locals + locals_len + stack_len; + self->real_stack = self->stack + locals_len + stack_len; + self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; return self; } @@ -162,7 +175,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); #endif -static _Py_PARTITIONNODE_t * +static inline _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -489,15 +502,53 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) } #endif +static bool +partitionnode_is_static(_Py_PARTITIONNODE_t *node) +{ + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + if (root_obj == _Py_NULL) { + return false; + } + return !root_obj->static_or_dynamic; +} + +// MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS +static inline PyObject * +get_const(_Py_PARTITIONNODE_t *node) +{ + assert(partitionnode_is_static(node)); + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + return root_obj->const_val; +} + +// Hardcoded for now, @TODO autogenerate these from the DSL. +static inline bool +op_is_pure(int opcode) +{ + switch (opcode) { + case LOAD_CONST: + case _BINARY_OP_MULTIPLY_INT: + case _BINARY_OP_ADD_INT: + case _BINARY_OP_SUBTRACT_INT: + case SAVE_IP: + return true; + default: + return false; + } +} + + #ifndef Py_DEBUG -#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#define GETITEM(v, i) PyList_GET_ITEM((v), (i)) #else static inline PyObject * GETITEM(PyObject *v, Py_ssize_t i) { - assert(PyTuple_Check(v)); + assert(PyList_CheckExact(v)); assert(i >= 0); - assert(i < PyTuple_GET_SIZE(v)); - return PyTuple_GET_ITEM(v, i); + assert(i < PyList_GET_SIZE(v)); + return PyList_GET_ITEM(v, i); } #endif @@ -509,7 +560,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(stack_pointer - ctx->stack)) +#define STACK_LEVEL() ((int)(stack_pointer - stack)) #define STACK_SIZE() (co->co_stacksize) #define BASIC_STACKADJ(n) (stack_pointer += n) @@ -518,7 +569,6 @@ _Py_uop_analyze_and_optimize( assert(n >= 0); \ BASIC_STACKADJ(n); \ assert(STACK_LEVEL() <= STACK_SIZE()); \ - ctx->stack_pointer = stack_pointer; \ } while (0) #define STACK_SHRINK(n) do { \ assert(n >= 0); \ @@ -540,17 +590,33 @@ _Py_uop_analyze_and_optimize( return trace_len; } + int buffer_trace_len = 0; + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); if (ctx == NULL) { PyMem_Free(temp_writebuffer); return trace_len; } + PyObject *co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); + if (co_const_copy == NULL) { + goto abstract_error; + } + // Copy over the co_const tuple + for (int x = 0; x < PyTuple_GET_SIZE(co->co_consts); x++) { + PyList_SET_ITEM(co_const_copy, x, Py_NewRef(PyTuple_GET_ITEM(co->co_consts, x))); + } + int oparg; int opcode; - _Py_PARTITIONNODE_t *stack_pointer = ctx->stack_pointer; - _Py_PARTITIONNODE_t *locals = ctx->locals; + _Py_PARTITIONNODE_t *stack_pointer; + _Py_PARTITIONNODE_t *locals; + _Py_PARTITIONNODE_t *stack; + for (int i = 0; i < trace_len; i++) { + stack_pointer = ctx->stack_pointer; + stack = ctx->stack; + locals = ctx->locals; oparg = trace[i].oparg; opcode = trace[i].opcode; #ifdef PARTITION_DEBUG @@ -560,15 +626,69 @@ _Py_uop_analyze_and_optimize( opcode, oparg); #endif #endif + + // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. + // For partial evaluation, we simply need to follow these rules: + // 1. Operations on dynamic variables need to be emitted. + // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. + // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. + + bool should_emit = false; + // For all stack inputs, are their variables static? + int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); + int num_dynamic_operands = 0; + assert(num_inputs >= 0); + for (int x = num_inputs + 1; x > 0; x--) { + if (!partitionnode_is_static(PEEK(x))) { + should_emit = true; + num_dynamic_operands++; + } + } + int num_static_operands = num_inputs - num_dynamic_operands; + + // We need to also check if this operation is "pure". That it can accept + // constant nodes, output constant nodes, and does not cause any side effects. + should_emit = should_emit || !op_is_pure(opcode); + + + if (should_emit) { + if (num_static_operands > 0) { + for (int x = num_inputs + 1; x > 0; x--) { + // Re-materialise all virtual (partially-evaluated) constants + if (partitionnode_is_static(PEEK(x))) { + PyObject *const_val = get_const(PEEK(x)); + _PyUOpInstruction load_const; + load_const.opcode = LOAD_CONST; + load_const.oparg = (int)PyList_GET_SIZE(co_const_copy); + if (PyList_Append(co_const_copy, const_val) < 0) { + goto abstract_error; + } + + temp_writebuffer[buffer_trace_len] = load_const; + buffer_trace_len++; + + // INSERT to the correct position in the stack + int offset_from_target = num_dynamic_operands - x - 1; + assert(offset_from_target >= 0); + if (offset_from_target) { + _PyUOpInstruction insert; + insert.opcode = INSERT; + insert.oparg = offset_from_target; + + temp_writebuffer[buffer_trace_len] = insert; + buffer_trace_len++; + } + num_dynamic_operands++; + } + + } + } + temp_writebuffer[buffer_trace_len] = trace[i]; + buffer_trace_len++; + } /* * The following are special cased: - "LOAD_FAST", - "LOAD_FAST_CHECK", - "LOAD_FAST_AND_CLEAR", - "LOAD_CONST", - "STORE_FAST", - "STORE_FAST_MAYBE_NULL", - "COPY", + * @TODO: shift these to the DSL */ switch (opcode) { #include "abstract_interp_cases.c.h" @@ -606,6 +726,72 @@ _Py_uop_analyze_and_optimize( PARTITIONNODE_SET(bottom, PEEK(1), false); break; } + + // Arithmetic operations + + case _BINARY_OP_MULTIPLY_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + + } + + case _BINARY_OP_ADD_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + } + + case _BINARY_OP_SUBTRACT_INT: { + if (!should_emit) { + PyObject *right; + PyObject *left; + PyObject *res; + right = get_const(&stack_pointer[-1]); + left = get_const(&stack_pointer[-2]); + STAT_INC(BINARY_OP, hit); + res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + if (res == NULL) goto abstract_error; + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); + break; + } + else { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + } default: fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); @@ -613,9 +799,108 @@ _Py_uop_analyze_and_optimize( #if PARTITION_DEBUG print_ctx(ctx); #endif + ctx->stack_pointer = stack_pointer; + if (opcode == EXIT_TRACE) { + break; + } +// if (should_emit) { +// +// // Emit instruction +// temp_writebuffer[buffer_trace_len] = trace[i]; +// buffer_trace_len++; +// +// // Update the real abstract interpreter +// stack_pointer = ctx->real_stack_pointer; +// locals = ctx->real_locals; +// stack = ctx->real_stack; +// +// /* +// * The following are special cased: +// * @TODO: shift these to the DSL +// */ +// switch (opcode) { +//#include "abstract_interp_cases.c.h" +// // @TODO convert these to autogenerated using DSL +// case LOAD_FAST: +// case LOAD_FAST_CHECK: +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); +// break; +// case LOAD_FAST_AND_CLEAR: { +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); +// PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); +// break; +// } +// case LOAD_CONST: { +// _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); +// STACK_GROW(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); +// break; +// } +// case STORE_FAST: +// case STORE_FAST_MAYBE_NULL: { +// _Py_PARTITIONNODE_t *value = PEEK(1); +// PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); +// STACK_SHRINK(1); +// break; +// } +// case COPY: { +// _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); +// STACK_GROW(1); +// PARTITIONNODE_SET(bottom, PEEK(1), false); +// break; +// } +// +// case _BINARY_OP_MULTIPLY_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// +// } +// +// case _BINARY_OP_ADD_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// } +// +// case _BINARY_OP_SUBTRACT_INT: { +// STACK_SHRINK(1); +// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); +// break; +// } +// default: +// fprintf(stderr, "Unknown opcode in abstract interpreter\n"); +// Py_UNREACHABLE(); +// } +// +// ctx->real_stack_pointer = stack_pointer; +// } } assert(STACK_SIZE() >= 0); + assert(buffer_trace_len <= trace_len); Py_DECREF(ctx); + + PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); + if (co_const_final == NULL) { + goto abstract_error; + } + // Copy over the co_const tuple + for (int x = 0; x < PyList_GET_SIZE(co_const_copy); x++) { + PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); + } + + Py_SETREF(co->co_consts, co_const_final); + Py_XDECREF(co_const_copy); + memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); + return buffer_trace_len; + +abstract_error: + Py_XDECREF(co_const_copy); + Py_DECREF(ctx); + assert(PyErr_Occurred()); + PyErr_Clear(); return trace_len; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 58da498f0d1d26..ca7f67ca1f6627 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -70,6 +70,12 @@ "STORE_FAST", "STORE_FAST_MAYBE_NULL", "COPY", + + # Arithmetic + "_BINARY_OP_MULTIPLY_INT", + "_BINARY_OP_ADD_INT", + "_BINARY_OP_SUBTRACT_INT", + } arg_parser = argparse.ArgumentParser( @@ -129,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind != "op": + if thing.kind != "op" or (thing.kind != "inst" and self.instrs[thing.name].is_viable_uop()): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) From 68c684febb7ce7b09e57145767025580bcdb9f49 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:01:58 +0800 Subject: [PATCH 22/48] rename vars --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5134d5a0baaf3f..724f1de5e49491 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -450,7 +450,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); + ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local @@ -486,7 +486,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); fprintf(stderr, "%s", - ptr == NULL ? "?" : (ptr->static_or_dyanmic == 0 ? "static" : "dynamic")); + ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); if (tag == TYPE_REF) { const char *wher = is_local From 46c577755be2835c1b3ab22273636c72d0380458 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 8 Aug 2023 18:42:10 +0800 Subject: [PATCH 23/48] fixx off by one --- Python/optimizer_analysis.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 724f1de5e49491..7a469e10bc0917 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -532,13 +532,26 @@ op_is_pure(int opcode) case _BINARY_OP_MULTIPLY_INT: case _BINARY_OP_ADD_INT: case _BINARY_OP_SUBTRACT_INT: - case SAVE_IP: - return true; default: return false; } } +static int +remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) +{ + return trace_len; +} + +/** + * Fixes all side exits due to jumps. This MUST be called as the last + * pass over the trace. Otherwise jumps will point to invalid ends. +*/ +static int +fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) +{ + return trace_len; +} #ifndef Py_DEBUG #define GETITEM(v, i) PyList_GET_ITEM((v), (i)) @@ -638,7 +651,7 @@ _Py_uop_analyze_and_optimize( int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); int num_dynamic_operands = 0; assert(num_inputs >= 0); - for (int x = num_inputs + 1; x > 0; x--) { + for (int x = num_inputs; x > 0; x--) { if (!partitionnode_is_static(PEEK(x))) { should_emit = true; num_dynamic_operands++; @@ -646,6 +659,7 @@ _Py_uop_analyze_and_optimize( } int num_static_operands = num_inputs - num_dynamic_operands; + assert(num_static_operands >= 0); // We need to also check if this operation is "pure". That it can accept // constant nodes, output constant nodes, and does not cause any side effects. should_emit = should_emit || !op_is_pure(opcode); @@ -653,7 +667,7 @@ _Py_uop_analyze_and_optimize( if (should_emit) { if (num_static_operands > 0) { - for (int x = num_inputs + 1; x > 0; x--) { + for (int x = num_inputs; x > 0; x--) { // Re-materialise all virtual (partially-evaluated) constants if (partitionnode_is_static(PEEK(x))) { PyObject *const_val = get_const(PEEK(x)); @@ -705,7 +719,7 @@ _Py_uop_analyze_and_optimize( break; } case LOAD_CONST: { - _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); + _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); STACK_GROW(1); PARTITIONNODE_OVERWRITE(value, PEEK(1), true); #if PARTITION_DEBUG @@ -800,9 +814,6 @@ _Py_uop_analyze_and_optimize( print_ctx(ctx); #endif ctx->stack_pointer = stack_pointer; - if (opcode == EXIT_TRACE) { - break; - } // if (should_emit) { // // // Emit instruction @@ -880,6 +891,15 @@ _Py_uop_analyze_and_optimize( } assert(STACK_SIZE() >= 0); assert(buffer_trace_len <= trace_len); + + buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); + buffer_trace_len = fix_jump_side_exits(temp_writebuffer, buffer_trace_len); + +#if PARTITION_DEBUG + if (buffer_trace_len < trace_len) { + fprintf(stderr, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); + } +#endif Py_DECREF(ctx); PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); @@ -891,6 +911,7 @@ _Py_uop_analyze_and_optimize( PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); } + Py_SETREF(co->co_consts, co_const_final); Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); From b839ee4e610f44c2d650360b85c1e7d1fc3da6d8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 9 Aug 2023 23:31:33 +0800 Subject: [PATCH 24/48] partial eval working for real this time --- Include/internal/pycore_opcode_metadata.h | 142 ++++++++++++++++++++++ Include/internal/pycore_uops.h | 2 +- Lib/test/test_capi/test_misc.py | 25 ++++ Python/optimizer_analysis.c | 108 +++++++++++++--- 4 files changed, 258 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1cab6c984f3ace..b0285751a7ad80 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -55,6 +55,7 @@ #define _POP_JUMP_IF_FALSE 331 #define _POP_JUMP_IF_TRUE 332 #define JUMP_TO_TOP 333 +#define INSERT 334 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -120,18 +121,38 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case UNARY_INVERT: return 1; + case _GUARD_BOTH_INT: + return 2; + case _BINARY_OP_MULTIPLY_INT: + return 2; + case _BINARY_OP_ADD_INT: + return 2; + case _BINARY_OP_SUBTRACT_INT: + return 2; case BINARY_OP_MULTIPLY_INT: return 2; case BINARY_OP_ADD_INT: return 2; case BINARY_OP_SUBTRACT_INT: return 2; + case _GUARD_BOTH_FLOAT: + return 2; + case _BINARY_OP_MULTIPLY_FLOAT: + return 2; + case _BINARY_OP_ADD_FLOAT: + return 2; + case _BINARY_OP_SUBTRACT_FLOAT: + return 2; case BINARY_OP_MULTIPLY_FLOAT: return 2; case BINARY_OP_ADD_FLOAT: return 2; case BINARY_OP_SUBTRACT_FLOAT: return 2; + case _GUARD_BOTH_UNICODE: + return 2; + case _BINARY_OP_ADD_UNICODE: + return 2; case BINARY_OP_ADD_UNICODE: return 2; case BINARY_OP_INPLACE_ADD_UNICODE: @@ -226,14 +247,26 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case DELETE_GLOBAL: return 0; + case _LOAD_LOCALS: + return 0; case LOAD_LOCALS: return 0; + case _LOAD_FROM_DICT_OR_GLOBALS: + return 1; case LOAD_NAME: return 0; case LOAD_FROM_DICT_OR_GLOBALS: return 1; case LOAD_GLOBAL: return 0; + case _GUARD_GLOBALS_VERSION: + return 0; + case _GUARD_BUILTINS_VERSION: + return 0; + case _LOAD_GLOBAL_MODULE: + return 0; + case _LOAD_GLOBAL_BUILTINS: + return 0; case LOAD_GLOBAL_MODULE: return 0; case LOAD_GLOBAL_BUILTIN: @@ -294,6 +327,12 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_METHOD: return 1; + case _GUARD_TYPE_VERSION: + return 1; + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE: + return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; case LOAD_ATTR_MODULE: @@ -348,6 +387,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case POP_JUMP_IF_TRUE: return 1; + case IS_NONE: + return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: @@ -372,10 +413,28 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case INSTRUMENTED_FOR_ITER: return 0; + case _ITER_CHECK_LIST: + return 1; + case _IS_ITER_EXHAUSTED_LIST: + return 1; + case _ITER_NEXT_LIST: + return 1; case FOR_ITER_LIST: return 1; + case _ITER_CHECK_TUPLE: + return 1; + case _IS_ITER_EXHAUSTED_TUPLE: + return 1; + case _ITER_NEXT_TUPLE: + return 1; case FOR_ITER_TUPLE: return 1; + case _ITER_CHECK_RANGE: + return 1; + case _IS_ITER_EXHAUSTED_RANGE: + return 1; + case _ITER_NEXT_RANGE: + return 1; case FOR_ITER_RANGE: return 1; case FOR_ITER_GEN: @@ -494,6 +553,18 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; + case _POP_JUMP_IF_FALSE: + return 1; + case _POP_JUMP_IF_TRUE: + return 1; + case JUMP_TO_TOP: + return 0; + case SAVE_IP: + return 0; + case EXIT_TRACE: + return 0; + case INSERT: + return 0; default: return -1; } @@ -564,18 +635,38 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case UNARY_INVERT: return 1; + case _GUARD_BOTH_INT: + return 2; + case _BINARY_OP_MULTIPLY_INT: + return 1; + case _BINARY_OP_ADD_INT: + return 1; + case _BINARY_OP_SUBTRACT_INT: + return 1; case BINARY_OP_MULTIPLY_INT: return 1; case BINARY_OP_ADD_INT: return 1; case BINARY_OP_SUBTRACT_INT: return 1; + case _GUARD_BOTH_FLOAT: + return 2; + case _BINARY_OP_MULTIPLY_FLOAT: + return 1; + case _BINARY_OP_ADD_FLOAT: + return 1; + case _BINARY_OP_SUBTRACT_FLOAT: + return 1; case BINARY_OP_MULTIPLY_FLOAT: return 1; case BINARY_OP_ADD_FLOAT: return 1; case BINARY_OP_SUBTRACT_FLOAT: return 1; + case _GUARD_BOTH_UNICODE: + return 2; + case _BINARY_OP_ADD_UNICODE: + return 1; case BINARY_OP_ADD_UNICODE: return 1; case BINARY_OP_INPLACE_ADD_UNICODE: @@ -670,14 +761,26 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case DELETE_GLOBAL: return 0; + case _LOAD_LOCALS: + return 1; case LOAD_LOCALS: return 1; + case _LOAD_FROM_DICT_OR_GLOBALS: + return 1; case LOAD_NAME: return 1; case LOAD_FROM_DICT_OR_GLOBALS: return 1; case LOAD_GLOBAL: return ((oparg & 1) ? 1 : 0) + 1; + case _GUARD_GLOBALS_VERSION: + return 0; + case _GUARD_BUILTINS_VERSION: + return 0; + case _LOAD_GLOBAL_MODULE: + return ((oparg & 1) ? 1 : 0) + 1; + case _LOAD_GLOBAL_BUILTINS: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_GLOBAL_MODULE: return (oparg & 1 ? 1 : 0) + 1; case LOAD_GLOBAL_BUILTIN: @@ -738,6 +841,12 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_METHOD: return ((oparg & 1) ? 1 : 0) + 1; + case _GUARD_TYPE_VERSION: + return 1; + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; case LOAD_ATTR_MODULE: @@ -792,6 +901,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case POP_JUMP_IF_TRUE: return 0; + case IS_NONE: + return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: @@ -816,10 +927,28 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case INSTRUMENTED_FOR_ITER: return 0; + case _ITER_CHECK_LIST: + return 1; + case _IS_ITER_EXHAUSTED_LIST: + return 2; + case _ITER_NEXT_LIST: + return 2; case FOR_ITER_LIST: return 2; + case _ITER_CHECK_TUPLE: + return 1; + case _IS_ITER_EXHAUSTED_TUPLE: + return 2; + case _ITER_NEXT_TUPLE: + return 2; case FOR_ITER_TUPLE: return 2; + case _ITER_CHECK_RANGE: + return 1; + case _IS_ITER_EXHAUSTED_RANGE: + return 2; + case _ITER_NEXT_RANGE: + return 2; case FOR_ITER_RANGE: return 2; case FOR_ITER_GEN: @@ -938,6 +1067,18 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; + case _POP_JUMP_IF_FALSE: + return 0; + case _POP_JUMP_IF_TRUE: + return 0; + case JUMP_TO_TOP: + return 0; + case SAVE_IP: + return 0; + case EXIT_TRACE: + return 0; + case INSERT: + return 0; default: return -1; } @@ -1386,5 +1527,6 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [JUMP_TO_TOP] = "JUMP_TO_TOP", + [INSERT] = "INSERT", }; #endif // NEED_OPCODE_METADATA diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 57a5970353b360..59451bc7826698 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_UOP_MAX_TRACE_LENGTH 32 +#define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { uint32_t opcode; diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index e7cdd4be002a14..26b0501d067a20 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2707,5 +2707,30 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) + +class TestUopsOptimization(unittest.TestCase): + + def test_int_constant_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = 0 + y = 1 + z = 2 + a = x + y + z + x + y + z + x + y + z + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(3) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 9) + binop_count = [opname == "_BINARY_OP_ADD_INT" for opname, _, _ in ex] + self.assertEqual(binop_count, 1) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7a469e10bc0917..a189555fbd7d21 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -102,6 +102,10 @@ typedef struct _Py_UOpsAbstractInterpContext { _Py_PARTITIONNODE_t *stack; int locals_len; _Py_PARTITIONNODE_t *locals; + + // Indicates whether the stack entry is real or virtualised. + // true - virtual false - real + bool *stack_virtual_or_real; // The following represent the real (emitted instructions) stack and locals. // points to one element after the abstract stack _Py_PARTITIONNODE_t *real_stack_pointer; @@ -156,6 +160,12 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return NULL; } + bool *virtual_or_real = PyMem_New(bool, stack_len); + if (virtual_or_real == NULL) { + Py_DECREF(self); + PyMem_Free(locals_with_stack); + return NULL; + } for (int i = 0; i < (locals_len + stack_len) * 2; i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; @@ -165,6 +175,8 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; + self->stack_virtual_or_real = virtual_or_real; + self->real_locals = self->locals + locals_len + stack_len; self->real_stack = self->stack + locals_len + stack_len; self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; @@ -175,7 +187,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); #endif -static inline _Py_PARTITIONNODE_t * +static _Py_PARTITIONNODE_t * partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) { _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); @@ -461,6 +473,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) fprintf(stderr, "->%s[%d]", wher, parent_idx); } + fprintf(stderr, " | "); } fprintf(stderr, "]\n"); @@ -497,6 +510,7 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) fprintf(stderr, "->%s[%d]", wher, parent_idx); } + fprintf(stderr, " | "); } fprintf(stderr, "]\n"); } @@ -506,11 +520,11 @@ static bool partitionnode_is_static(_Py_PARTITIONNODE_t *node) { _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); if (root_obj == _Py_NULL) { return false; } - return !root_obj->static_or_dynamic; + return root_obj->static_or_dynamic == 0; } // MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS @@ -519,28 +533,54 @@ get_const(_Py_PARTITIONNODE_t *node) { assert(partitionnode_is_static(node)); _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag((_Py_PARTITIONNODE_t)root); + _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag(*root); return root_obj->const_val; } // Hardcoded for now, @TODO autogenerate these from the DSL. static inline bool -op_is_pure(int opcode) +op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) { switch (opcode) { case LOAD_CONST: case _BINARY_OP_MULTIPLY_INT: case _BINARY_OP_ADD_INT: case _BINARY_OP_SUBTRACT_INT: + case _GUARD_BOTH_INT: + return true; + case LOAD_FAST: + return partitionnode_is_static(&locals[oparg]) && get_const(&locals[oparg]) != _Py_NULL; default: return false; } } +// Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { - return trace_len; + _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); + if (temp_trace == NULL) { + return trace_len; + } + int temp_trace_len = 0; + + _PyUOpInstruction curr; + for (int i = 0; i < trace_len; i++) { + curr = trace[i]; + if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { + continue; + } + temp_trace[temp_trace_len] = curr; + temp_trace_len++; + } + memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); + PyMem_Free(temp_trace); + +#if PARTITION_DEBUG + fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); +#endif + return temp_trace_len; } /** @@ -550,6 +590,15 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) static int fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) { + for (int i = 0; i < trace_len; i++) { + int oparg = trace[i].oparg; + int opcode = trace[i].opcode; + switch (opcode) { + case _POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_FALSE: + trace[i].oparg = trace_len - 2; + } + } return trace_len; } @@ -625,6 +674,7 @@ _Py_uop_analyze_and_optimize( _Py_PARTITIONNODE_t *stack_pointer; _Py_PARTITIONNODE_t *locals; _Py_PARTITIONNODE_t *stack; + bool *stack_virtual_or_real = ctx->stack_virtual_or_real; for (int i = 0; i < trace_len; i++) { stack_pointer = ctx->stack_pointer; @@ -646,30 +696,40 @@ _Py_uop_analyze_and_optimize( // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. - bool should_emit = false; // For all stack inputs, are their variables static? int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); int num_dynamic_operands = 0; + + // We need to also check if this operation is "pure". That it can accept + // constant nodes, output constant nodes, and does not cause any side effects. + bool should_emit = !op_is_pure(opcode, oparg, locals); + + int virtual_objects = 0; assert(num_inputs >= 0); for (int x = num_inputs; x > 0; x--) { if (!partitionnode_is_static(PEEK(x))) { should_emit = true; num_dynamic_operands++; } + if (stack_virtual_or_real[STACK_LEVEL() - num_inputs]) { + virtual_objects++; + } } + int num_static_operands = num_inputs - num_dynamic_operands; assert(num_static_operands >= 0); - // We need to also check if this operation is "pure". That it can accept - // constant nodes, output constant nodes, and does not cause any side effects. - should_emit = should_emit || !op_is_pure(opcode); if (should_emit) { if (num_static_operands > 0) { + int real_stack_size = num_dynamic_operands; + int virtual_stack_size = (int)(ctx->stack_pointer - ctx->stack); + assert(virtual_stack_size >= real_stack_size); for (int x = num_inputs; x > 0; x--) { // Re-materialise all virtual (partially-evaluated) constants - if (partitionnode_is_static(PEEK(x))) { + if (partitionnode_is_static(PEEK(x)) && stack_virtual_or_real[STACK_LEVEL() - x]) { + stack_virtual_or_real[STACK_LEVEL() - x] = false; PyObject *const_val = get_const(PEEK(x)); _PyUOpInstruction load_const; load_const.opcode = LOAD_CONST; @@ -678,17 +738,23 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting LOAD_CONST\n"); +#endif temp_writebuffer[buffer_trace_len] = load_const; buffer_trace_len++; + // INSERT to the correct position in the stack - int offset_from_target = num_dynamic_operands - x - 1; - assert(offset_from_target >= 0); - if (offset_from_target) { + int offset_from_target = x - num_dynamic_operands - 1; + if (offset_from_target > 0) { _PyUOpInstruction insert; insert.opcode = INSERT; - insert.oparg = offset_from_target; + insert.oparg = -offset_from_target; +#if PARTITION_DEBUG + fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); +#endif temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } @@ -697,6 +763,9 @@ _Py_uop_analyze_and_optimize( } } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); +#endif temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -722,9 +791,6 @@ _Py_uop_analyze_and_optimize( _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); STACK_GROW(1); PARTITIONNODE_OVERWRITE(value, PEEK(1), true); -#if PARTITION_DEBUG - print_ctx(ctx); -#endif break; } case STORE_FAST: @@ -814,6 +880,12 @@ _Py_uop_analyze_and_optimize( print_ctx(ctx); #endif ctx->stack_pointer = stack_pointer; + + // Mark all stack outputs as virtual or real + int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); + for (int y = stack_outputs; y > 0; y--) { + stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; + } // if (should_emit) { // // // Emit instruction From 6ecf3d253de44bb70833c2b274aac0968d36909f Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Thu, 10 Aug 2023 02:16:31 +0800 Subject: [PATCH 25/48] Fix: Inconsistent `AbstractInterpContext` used in `PARTITIONNODE_OVERWRITE` and mis-port of `PARTITIONNODE_OVERWRITE` (#41) * Fix: Inconsistent AbstractInterpContext used in PARTITIONNODE_OVERWRITE and typo in PARTITIONNODE_OVERWRITE * Style: Removed whitespace --- Python/optimizer_analysis.c | 60 ++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a189555fbd7d21..f810b160527f3f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -387,7 +387,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, // Children of dst will have this form _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag(*dst)); + (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); // Search locals for children int nlocals = ctx->locals_len; @@ -461,8 +461,11 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s", + fprintf(stderr, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == 0) { + PyObject_Print(ptr->const_val, stderr, 0); + } if (tag == TYPE_REF) { const char *wher = is_local @@ -498,8 +501,11 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) } _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s", + fprintf(stderr, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == 0) { + PyObject_Print(ptr->const_val, stderr, 0); + } if (tag == TYPE_REF) { const char *wher = is_local @@ -622,9 +628,9 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(stack_pointer - stack)) +#define STACK_LEVEL() ((int)(*stack_pointer - stack)) #define STACK_SIZE() (co->co_stacksize) -#define BASIC_STACKADJ(n) (stack_pointer += n) +#define BASIC_STACKADJ(n) (*stack_pointer += n) #ifdef Py_DEBUG #define STACK_GROW(n) do { \ @@ -641,7 +647,7 @@ _Py_uop_analyze_and_optimize( #define STACK_GROW(n) BASIC_STACKADJ(n) #define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) #endif -#define PEEK(idx) (&(stack_pointer[-(idx)])) +#define PEEK(idx) (&((*stack_pointer)[-(idx)])) #define GETLOCAL(idx) (&(locals[idx])) #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) @@ -671,24 +677,14 @@ _Py_uop_analyze_and_optimize( int oparg; int opcode; - _Py_PARTITIONNODE_t *stack_pointer; - _Py_PARTITIONNODE_t *locals; - _Py_PARTITIONNODE_t *stack; bool *stack_virtual_or_real = ctx->stack_virtual_or_real; + _Py_PARTITIONNODE_t **stack_pointer = &ctx->stack_pointer; + _Py_PARTITIONNODE_t *stack = ctx->stack; + _Py_PARTITIONNODE_t *locals = ctx->locals; for (int i = 0; i < trace_len; i++) { - stack_pointer = ctx->stack_pointer; - stack = ctx->stack; - locals = ctx->locals; oparg = trace[i].oparg; opcode = trace[i].opcode; -#ifdef PARTITION_DEBUG -#ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", - (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg); -#endif -#endif // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. // For partial evaluation, we simply need to follow these rules: @@ -773,6 +769,14 @@ _Py_uop_analyze_and_optimize( * The following are special cased: * @TODO: shift these to the DSL */ + +#ifdef PARTITION_DEBUG +#ifdef Py_DEBUG + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], + opcode, oparg); +#endif +#endif switch (opcode) { #include "abstract_interp_cases.c.h" // @TODO convert these to autogenerated using DSL @@ -803,7 +807,7 @@ _Py_uop_analyze_and_optimize( case COPY: { _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); STACK_GROW(1); - PARTITIONNODE_SET(bottom, PEEK(1), false); + PARTITIONNODE_OVERWRITE(bottom, PEEK(1), false); break; } @@ -814,8 +818,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -836,8 +840,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -857,8 +861,8 @@ _Py_uop_analyze_and_optimize( PyObject *right; PyObject *left; PyObject *res; - right = get_const(&stack_pointer[-1]); - left = get_const(&stack_pointer[-2]); + right = get_const(PEEK(1)); + left = get_const(PEEK(2)); STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); if (res == NULL) goto abstract_error; @@ -876,10 +880,10 @@ _Py_uop_analyze_and_optimize( fprintf(stderr, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } + #if PARTITION_DEBUG print_ctx(ctx); #endif - ctx->stack_pointer = stack_pointer; // Mark all stack outputs as virtual or real int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); @@ -983,7 +987,7 @@ _Py_uop_analyze_and_optimize( PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); } - + Py_SETREF(co->co_consts, co_const_final); Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); From b6eeb25d11717c46c87516b58a10538a3af4bc95 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 10 Aug 2023 02:52:28 +0800 Subject: [PATCH 26/48] fix test, refactor, bugfix --- Lib/test/test_capi/test_misc.py | 4 +- Python/optimizer_analysis.c | 220 ++++++++++---------------------- 2 files changed, 69 insertions(+), 155 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 26b0501d067a20..e4ecb8128eea1b 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2729,8 +2729,8 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 9) - binop_count = [opname == "_BINARY_OP_ADD_INT" for opname, _, _ in ex] - self.assertEqual(binop_count, 1) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertEqual(len(binop_count), 1) if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f810b160527f3f..0baf527f767dd9 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -15,6 +15,9 @@ #define PARTITION_DEBUG 1 +#define STATIC 0 +#define DYNAMIC 1 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -417,6 +420,57 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, #if PARTITION_DEBUG +void +print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) +{ + bool is_local = false; + bool is_stack = false; + + int locals_offset = -1; + int stack_offset = -1; + int parent_idx = -1; + + _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; + _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); + + _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); + + if (is_printing_stack) { + fprintf(stderr, "%s", i == nstack_use - 1 ? "." : " "); + } + + if (tag == TYPE_REF) { + _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); + int local_index = (int)(parent - ctx->locals); + int stack_index = (int)(parent - ctx->stack); + is_local = local_index >= 0 && local_index < ctx->locals_len; + is_stack = stack_index >= 0 && stack_index < nstack; + parent_idx = is_local + ? local_index + : is_stack + ? stack_index + : -1; + } + + + _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); + fprintf(stderr, "%s:", + ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); + if (ptr != NULL && ptr->static_or_dynamic == STATIC) { + PyObject_Print(ptr->const_val, stderr, 0); + } + + if (tag == TYPE_REF) { + const char *wher = is_local + ? "locals" + : is_stack + ? "stack" + : "const"; + fprintf(stderr, "->%s[%d]", + wher, parent_idx); + } +} + /** * @brief Print the entries in the abstract interpreter context (along with locals). */ @@ -430,92 +484,16 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) int nstack = ctx->stack_len; int nlocals = ctx->locals_len; - bool is_local = false; - bool is_stack = false; - - int locals_offset = -1; - int stack_offset = -1; - int parent_idx = -1; - fprintf(stderr, " Stack: %p: [", ctx->stack); for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node = &ctx->stack[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - fprintf(stderr, "%s", i == nstack_use ? "." : " "); - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == 0) { - PyObject_Print(ptr->const_val, stderr, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); - } + print_ctx_node(ctx, i, true, nstack_use, nstack); fprintf(stderr, " | "); } fprintf(stderr, "]\n"); fprintf(stderr, " Locals %p: [", locals); for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node = &ctx->locals[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == 0 ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == 0) { - PyObject_Print(ptr->const_val, stderr, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); - } + print_ctx_node(ctx, i, false, nstack_use, nstack); fprintf(stderr, " | "); } fprintf(stderr, "]\n"); @@ -530,7 +508,7 @@ partitionnode_is_static(_Py_PARTITIONNODE_t *node) if (root_obj == _Py_NULL) { return false; } - return root_obj->static_or_dynamic == 0; + return root_obj->static_or_dynamic == STATIC; } // MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS @@ -754,6 +732,16 @@ _Py_uop_analyze_and_optimize( temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } +#if PARTITION_DEBUG + fprintf(stderr, "Emitting SAVE_IP\n"); +#endif + // Use the next SAVE_IP + int temp = i; + for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++) { + } + assert(trace[temp].opcode == SAVE_IP); + temp_writebuffer[buffer_trace_len] = trace[temp]; + buffer_trace_len++; num_dynamic_operands++; } @@ -890,80 +878,6 @@ _Py_uop_analyze_and_optimize( for (int y = stack_outputs; y > 0; y--) { stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; } -// if (should_emit) { -// -// // Emit instruction -// temp_writebuffer[buffer_trace_len] = trace[i]; -// buffer_trace_len++; -// -// // Update the real abstract interpreter -// stack_pointer = ctx->real_stack_pointer; -// locals = ctx->real_locals; -// stack = ctx->real_stack; -// -// /* -// * The following are special cased: -// * @TODO: shift these to the DSL -// */ -// switch (opcode) { -//#include "abstract_interp_cases.c.h" -// // @TODO convert these to autogenerated using DSL -// case LOAD_FAST: -// case LOAD_FAST_CHECK: -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); -// break; -// case LOAD_FAST_AND_CLEAR: { -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); -// PARTITIONNODE_OVERWRITE(&PARTITIONNODE_NULLROOT, GETLOCAL(oparg), false); -// break; -// } -// case LOAD_CONST: { -// _Py_PARTITIONNODE_t value = MAKE_STATIC_ROOT(GETITEM(co->co_consts, oparg)); -// STACK_GROW(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)value, PEEK(1), false); -// break; -// } -// case STORE_FAST: -// case STORE_FAST_MAYBE_NULL: { -// _Py_PARTITIONNODE_t *value = PEEK(1); -// PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); -// STACK_SHRINK(1); -// break; -// } -// case COPY: { -// _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); -// STACK_GROW(1); -// PARTITIONNODE_SET(bottom, PEEK(1), false); -// break; -// } -// -// case _BINARY_OP_MULTIPLY_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// -// } -// -// case _BINARY_OP_ADD_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// } -// -// case _BINARY_OP_SUBTRACT_INT: { -// STACK_SHRINK(1); -// PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); -// break; -// } -// default: -// fprintf(stderr, "Unknown opcode in abstract interpreter\n"); -// Py_UNREACHABLE(); -// } -// -// ctx->real_stack_pointer = stack_pointer; -// } } assert(STACK_SIZE() >= 0); assert(buffer_trace_len <= trace_len); From d5cceb999a88f0695c54d35b533fc7de7f84b1a8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 10 Aug 2023 21:20:34 +0800 Subject: [PATCH 27/48] re-compute jump offsets and targets --- Include/internal/pycore_uops.h | 4 +- Python/optimizer_analysis.c | 149 ++++++++++++++++++++++++++++++--- 2 files changed, 141 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 59451bc7826698..d351f3353a5c6f 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -11,8 +11,8 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { - uint32_t opcode; - uint32_t oparg; + int32_t opcode; + int32_t oparg; uint64_t operand; // A cache entry } _PyUOpInstruction; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0baf527f767dd9..2c16b12f62b80f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -539,6 +539,72 @@ op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) } } +static inline bool +op_is_jump(int opcode) +{ + return (opcode == _POP_JUMP_IF_FALSE || opcode == _POP_JUMP_IF_TRUE); +} + + +// Number the jump targets and the jump instructions with a unique (negative) ID. +// This replaces the instruction's opcode in the trace with their negative IDs. +// Aids relocation later when we need to recompute jumps after optimization passes. +static _PyUOpInstruction * +number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) +{ + int jump_and_target_count = 0; + int jump_and_target_id = -1; + for (int i = 0; i < trace_len; i++) { + if (op_is_jump(trace[i].opcode)) { + // 1 for the jump, 1 for its target + jump_and_target_count += 2; + } + } + + // +1 because 1-based indexing not zero based + _PyUOpInstruction *jump_id_to_instruction = PyMem_New(_PyUOpInstruction, jump_and_target_count + 1); + if (jump_id_to_instruction == NULL) { + return NULL; + } + + + for (int i = 0; i < trace_len; i++) { + if (op_is_jump(trace[i].opcode)) { + int target = trace[i].oparg; + int target_id = jump_and_target_id; + + // 1 for the jump target + assert(jump_and_target_id < 0); + // Negative opcode! + assert(trace[target].opcode > 0); + // Already assigned a jump ID + if (trace[target].opcode < 0) { + target_id = trace[target].opcode; + } + else { + // Else, assign a new jump ID. + jump_id_to_instruction[-target_id] = trace[target]; + trace[target].opcode = target_id; + jump_and_target_id--; + fprintf(stderr, "op %d oparg %d\n", jump_id_to_instruction[-target_id].opcode, jump_id_to_instruction[-target_id].oparg); + } + + // 1 for the jump + assert(jump_and_target_id < 0); + jump_id_to_instruction[-jump_and_target_id] = trace[i]; + // Negative opcode! + assert(trace[i].opcode >= 0); + trace[i].opcode = jump_and_target_id; + jump_and_target_id--; + // Point the jump to the target ID. + trace[i].oparg = target_id; + + } + } + *max_id = jump_and_target_id; + return jump_id_to_instruction; +} + // Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) @@ -570,20 +636,51 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) /** * Fixes all side exits due to jumps. This MUST be called as the last * pass over the trace. Otherwise jumps will point to invalid ends. + * + * Runtime complexity of O(n*k), where n is trace length and k is number of jump + * instructions. Since k is usually quite low, this is nearly linear. */ -static int -fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len) +static void +fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, + _PyUOpInstruction *jump_id_to_instruction, int max_jump_id) { for (int i = 0; i < trace_len; i++) { int oparg = trace[i].oparg; int opcode = trace[i].opcode; - switch (opcode) { - case _POP_JUMP_IF_TRUE: - case _POP_JUMP_IF_FALSE: - trace[i].oparg = trace_len - 2; + // Indicates it's a jump target or jump instruction + if (opcode < 0 && opcode > max_jump_id) { + opcode = -opcode; + int real_oparg = jump_id_to_instruction[opcode].oparg; + int real_opcode = jump_id_to_instruction[opcode].opcode; + if (op_is_jump(real_opcode)) { + trace[i].opcode = real_opcode; + + // Search for our target ID. + int target_id = oparg; + for (int x = 0; x < trace_len; x++) { + if (trace[x].opcode == target_id) { + trace[i].oparg = x; + break; + } + } + + assert(trace[i].oparg >= 0); + } + } + } + + // Final pass to swap out all the jump target IDs with their actual targets. + for (int i = 0; i < trace_len; i++) { + int oparg = trace[i].oparg; + int opcode = trace[i].opcode; + // Indicates it's a jump target or jump instruction + if (opcode < 0 && opcode > max_jump_id) { + int real_oparg = jump_id_to_instruction[-opcode].oparg; + int real_opcode = jump_id_to_instruction[-opcode].opcode; + trace[i].oparg = real_oparg; + trace[i].opcode = real_opcode; } } - return trace_len; } #ifndef Py_DEBUG @@ -631,6 +728,9 @@ _Py_uop_analyze_and_optimize( #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) #define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) #define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) + PyObject *co_const_copy = NULL; + _PyUOpInstruction *jump_id_to_instruction = NULL; + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); if (temp_writebuffer == NULL) { return trace_len; @@ -644,7 +744,14 @@ _Py_uop_analyze_and_optimize( return trace_len; } - PyObject *co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); + int max_jump_id = 0; + jump_id_to_instruction = number_jumps_and_targets(trace, trace_len, &max_jump_id); + if (jump_id_to_instruction == NULL) { + goto abstract_error; + } + + + co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); if (co_const_copy == NULL) { goto abstract_error; } @@ -664,6 +771,15 @@ _Py_uop_analyze_and_optimize( oparg = trace[i].oparg; opcode = trace[i].opcode; + // Is a special jump/target ID, decode that + if (opcode < 0 && opcode > max_jump_id) { +#if PARTITION_DEBUG + fprintf(stderr, "Special jump target/ID %d\n", opcode); +#endif + oparg = jump_id_to_instruction[-opcode].oparg; + opcode = jump_id_to_instruction[-opcode].opcode; + } + // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. // For partial evaluation, we simply need to follow these rules: // 1. Operations on dynamic variables need to be emitted. @@ -878,12 +994,22 @@ _Py_uop_analyze_and_optimize( for (int y = stack_outputs; y > 0; y--) { stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; } + + if (opcode == EXIT_TRACE) { + // Copy the rest of the stubs over, then end. + for (; i < trace_len; i++) { + temp_writebuffer[buffer_trace_len] = trace[i]; + buffer_trace_len++; + } + break; + } } assert(STACK_SIZE() >= 0); - assert(buffer_trace_len <= trace_len); buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); - buffer_trace_len = fix_jump_side_exits(temp_writebuffer, buffer_trace_len); + fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); + + assert(buffer_trace_len <= trace_len); #if PARTITION_DEBUG if (buffer_trace_len < trace_len) { @@ -906,11 +1032,14 @@ _Py_uop_analyze_and_optimize( Py_XDECREF(co_const_copy); memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_writebuffer); + PyMem_Free(jump_id_to_instruction); return buffer_trace_len; abstract_error: Py_XDECREF(co_const_copy); Py_DECREF(ctx); + PyMem_Free(temp_writebuffer); + PyMem_Free(jump_id_to_instruction); assert(PyErr_Occurred()); PyErr_Clear(); return trace_len; From 8c0d65fab8e3c8f79e11e8853d3e997f17f3ac54 Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Fri, 11 Aug 2023 00:28:36 +0800 Subject: [PATCH 28/48] Fix+Refactor: Extra EXIT_TRACE emitted (#42) * Fix+Refactor: Extra EXIT_TRACE emitted * Style: Removed whitespace --- Python/optimizer_analysis.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2c16b12f62b80f..7a66300a384d32 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -848,14 +848,15 @@ _Py_uop_analyze_and_optimize( temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting SAVE_IP\n"); -#endif + // Use the next SAVE_IP int temp = i; - for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++) { - } + for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); assert(trace[temp].opcode == SAVE_IP); + +#if PARTITION_DEBUG + fprintf(stderr, "Emitting SAVE_IP\n"); +#endif temp_writebuffer[buffer_trace_len] = trace[temp]; buffer_trace_len++; num_dynamic_operands++; @@ -876,9 +877,10 @@ _Py_uop_analyze_and_optimize( #ifdef PARTITION_DEBUG #ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d\n", + fprintf(stderr, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg); + opcode, oparg, + i, buffer_trace_len); #endif #endif switch (opcode) { @@ -997,7 +999,14 @@ _Py_uop_analyze_and_optimize( if (opcode == EXIT_TRACE) { // Copy the rest of the stubs over, then end. +#if PARTITION_DEBUG + fprintf(stderr, "Exit trace encountered, emitting the rest of the stubs\n"); +#endif + i++; // We've already emitted an EXIT_TRACE for (; i < trace_len; i++) { +#if PARTITION_DEBUG + fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); +#endif temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -1005,10 +1014,8 @@ _Py_uop_analyze_and_optimize( } } assert(STACK_SIZE() >= 0); - buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); - assert(buffer_trace_len <= trace_len); #if PARTITION_DEBUG From 95db909ae151949710bbe5b06ddc7c0a51491c4a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 01:28:22 +0800 Subject: [PATCH 29/48] fix: overallocate buffer and virtual/real stack offset calculation --- Python/optimizer_analysis.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7a66300a384d32..09b05fc3591ddf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -18,6 +18,8 @@ #define STATIC 0 #define DYNAMIC 1 +#define OVERALLOCATE_FACTOR 2 + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -731,7 +733,7 @@ _Py_uop_analyze_and_optimize( PyObject *co_const_copy = NULL; _PyUOpInstruction *jump_id_to_instruction = NULL; - _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len); + _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len * OVERALLOCATE_FACTOR); if (temp_writebuffer == NULL) { return trace_len; } @@ -836,11 +838,13 @@ _Py_uop_analyze_and_optimize( // INSERT to the correct position in the stack - int offset_from_target = x - num_dynamic_operands - 1; + int target_entry = virtual_stack_size - x; + + int offset_from_target = real_stack_size - target_entry; if (offset_from_target > 0) { _PyUOpInstruction insert; insert.opcode = INSERT; - insert.oparg = -offset_from_target; + insert.oparg = offset_from_target; #if PARTITION_DEBUG fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); From 1e05ef8aaed758e6913a23dddd6bb8420f6191b3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 01:29:25 +0800 Subject: [PATCH 30/48] more bugfix --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 4e3ec2953cb17b..ff306b6e5b2482 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -698,12 +698,12 @@ uop_optimize( return trace_length; } OBJECT_STAT_INC(optimization_traces_created); + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); if (executor == NULL) { return -1; } executor->base.execute = _PyUopExecute; - trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; return 1; From 4d7abc7422a6a214ff8386bd7015e4d804be9d08 Mon Sep 17 00:00:00 2001 From: Jules <57632293+JuliaPoo@users.noreply.github.com> Date: Fri, 11 Aug 2023 15:11:27 +0800 Subject: [PATCH 31/48] Perf+Cleanup: Removed temporary allocation in `remove_duplicate_save_ips` (#43) * Cleanup: Removed warnings from ubuntu build * Perf: Removed temporary allocation in `remove_duplicate_save_ips` --- Python/optimizer_analysis.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 09b05fc3591ddf..44d8b603991dc2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -338,7 +338,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nlocals; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); if (*node_ptr == child_test) { - if (new_root == (_Py_PARTITIONNODE_t)NULL) { + if (new_root == NULL) { // First child encountered! initialise root new_root = node_ptr; *node_ptr = old_dst; @@ -356,7 +356,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, for (int i = 0; i < nstack; i++) { _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); if (*node_ptr == child_test) { - if (new_root == (_Py_PARTITIONNODE_t)NULL) { + if (new_root == NULL) { // First child encountered! initialise root new_root = node_ptr; *node_ptr = old_dst; @@ -428,8 +428,6 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack bool is_local = false; bool is_stack = false; - int locals_offset = -1; - int stack_offset = -1; int parent_idx = -1; _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; @@ -611,11 +609,9 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { - _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); - if (temp_trace == NULL) { - return trace_len; - } - int temp_trace_len = 0; + // Don't have to allocate a temporary trace array + // because the writer is guaranteed to be behind the reader. + int new_temp_len = 0; _PyUOpInstruction curr; for (int i = 0; i < trace_len; i++) { @@ -623,16 +619,14 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { continue; } - temp_trace[temp_trace_len] = curr; - temp_trace_len++; + trace[new_temp_len] = curr; + new_temp_len++; } - memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); - PyMem_Free(temp_trace); #if PARTITION_DEBUG - fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); + fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - new_temp_len); #endif - return temp_trace_len; + return new_temp_len; } /** @@ -652,7 +646,6 @@ fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, // Indicates it's a jump target or jump instruction if (opcode < 0 && opcode > max_jump_id) { opcode = -opcode; - int real_oparg = jump_id_to_instruction[opcode].oparg; int real_opcode = jump_id_to_instruction[opcode].opcode; if (op_is_jump(real_opcode)) { trace[i].opcode = real_opcode; @@ -673,7 +666,6 @@ fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, // Final pass to swap out all the jump target IDs with their actual targets. for (int i = 0; i < trace_len; i++) { - int oparg = trace[i].oparg; int opcode = trace[i].opcode; // Indicates it's a jump target or jump instruction if (opcode < 0 && opcode > max_jump_id) { From 3d76f9a66666a956073e2c109ca9f7db2a6b46a6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 11 Aug 2023 23:47:34 +0800 Subject: [PATCH 32/48] clean up code --- Python/abstract_interp_cases.c.h | 21 +++-- Python/optimizer_analysis.c | 122 ++++++++++++++++++------------ Tools/cases_generator/stacking.py | 12 +-- 3 files changed, 89 insertions(+), 66 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6e8448d3dc4412..3aa267d37f28a2 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -132,6 +132,12 @@ break; } + case BINARY_SUBSCR_STR_INT: { + STACK_SHRINK(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case BINARY_SUBSCR_TUPLE_INT: { STACK_SHRINK(1); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); @@ -291,7 +297,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -307,7 +313,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -315,7 +321,7 @@ STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -419,9 +425,8 @@ case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); - STACK_GROW(((oparg & 1) ? 1 : 0)); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (0 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(0 ? 1 : 0))), true); break; } @@ -435,7 +440,7 @@ case LOAD_ATTR: { STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } @@ -450,7 +455,7 @@ case _LOAD_ATTR_INSTANCE_VALUE: { STACK_GROW(((oparg & 1) ? 1 : 0)); PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (oparg & 1 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(oparg & 1 ? 1 : 0))), true); break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 09b05fc3591ddf..6c2421efbc9764 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -20,6 +20,13 @@ #define OVERALLOCATE_FACTOR 2 +#ifdef Py_DEBUG +#define DPRINTF(level, ...) \ + if (lltrace >= (level)) { printf(__VA_ARGS__); } +#else +#define DPRINTF(level, ...) +#endif + // TYPENODE is a tagged pointer that uses the last 2 LSB as the tag #define _Py_PARTITIONNODE_t uintptr_t @@ -420,11 +427,17 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, } } -#if PARTITION_DEBUG +#ifdef Py_DEBUG void print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) { + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + bool is_local = false; bool is_stack = false; @@ -438,7 +451,7 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); if (is_printing_stack) { - fprintf(stderr, "%s", i == nstack_use - 1 ? "." : " "); + DPRINTF(3, "%s", i == nstack_use - 1 ? "." : " "); } if (tag == TYPE_REF) { @@ -456,10 +469,10 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - fprintf(stderr, "%s:", + DPRINTF(3, "%s:", ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); - if (ptr != NULL && ptr->static_or_dynamic == STATIC) { - PyObject_Print(ptr->const_val, stderr, 0); + if (lltrace >= 4 && ptr != NULL && ptr->static_or_dynamic == STATIC) { + PyObject_Print(ptr->const_val, stdout, 0); } if (tag == TYPE_REF) { @@ -468,8 +481,7 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack : is_stack ? "stack" : "const"; - fprintf(stderr, "->%s[%d]", - wher, parent_idx); + DPRINTF(3, "->%s[%d]", wher, parent_idx); } } @@ -479,6 +491,12 @@ print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack static void print_ctx(_Py_UOpsAbstractInterpContext *ctx) { + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + _Py_PARTITIONNODE_t *locals = ctx->locals; _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; @@ -486,19 +504,19 @@ print_ctx(_Py_UOpsAbstractInterpContext *ctx) int nstack = ctx->stack_len; int nlocals = ctx->locals_len; - fprintf(stderr, " Stack: %p: [", ctx->stack); + DPRINTF(3, " Stack: %p: [", ctx->stack); for (int i = 0; i < nstack; i++) { print_ctx_node(ctx, i, true, nstack_use, nstack); - fprintf(stderr, " | "); + DPRINTF(3, " | "); } - fprintf(stderr, "]\n"); + DPRINTF(3, "]\n"); - fprintf(stderr, " Locals %p: [", locals); + DPRINTF(3, " Locals %p: [", locals); for (int i = 0; i < nlocals; i++) { print_ctx_node(ctx, i, false, nstack_use, nstack); - fprintf(stderr, " | "); + DPRINTF(3, " | "); } - fprintf(stderr, "]\n"); + DPRINTF(3, "]\n"); } #endif @@ -588,7 +606,6 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) jump_id_to_instruction[-target_id] = trace[target]; trace[target].opcode = target_id; jump_and_target_id--; - fprintf(stderr, "op %d oparg %d\n", jump_id_to_instruction[-target_id].opcode, jump_id_to_instruction[-target_id].oparg); } // 1 for the jump @@ -611,6 +628,14 @@ number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) static int remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) { +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + _PyUOpInstruction *temp_trace = PyMem_New(_PyUOpInstruction, trace_len); if (temp_trace == NULL) { return trace_len; @@ -629,9 +654,7 @@ remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) memcpy(trace, temp_trace, temp_trace_len * sizeof(_PyUOpInstruction)); PyMem_Free(temp_trace); -#if PARTITION_DEBUG - fprintf(stderr, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); -#endif + DPRINTF(3, "Removed %d SAVE_IPs\n", trace_len - temp_trace_len); return temp_trace_len; } @@ -730,6 +753,14 @@ _Py_uop_analyze_and_optimize( #define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) #define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) #define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) +#ifdef Py_DEBUG + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } +#endif + PyObject *co_const_copy = NULL; _PyUOpInstruction *jump_id_to_instruction = NULL; @@ -740,7 +771,8 @@ _Py_uop_analyze_and_optimize( int buffer_trace_len = 0; - _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New(co->co_stacksize, co->co_nlocals, curr_stacklen); + _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New( + co->co_stacksize, co->co_nlocals, curr_stacklen); if (ctx == NULL) { PyMem_Free(temp_writebuffer); return trace_len; @@ -775,9 +807,7 @@ _Py_uop_analyze_and_optimize( // Is a special jump/target ID, decode that if (opcode < 0 && opcode > max_jump_id) { -#if PARTITION_DEBUG - fprintf(stderr, "Special jump target/ID %d\n", opcode); -#endif + DPRINTF(2, "Special jump target/ID %d\n", opcode); oparg = jump_id_to_instruction[-opcode].oparg; opcode = jump_id_to_instruction[-opcode].opcode; } @@ -830,9 +860,8 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting LOAD_CONST\n"); -#endif + DPRINTF(2, "Emitting LOAD_CONST\n"); + temp_writebuffer[buffer_trace_len] = load_const; buffer_trace_len++; @@ -846,9 +875,8 @@ _Py_uop_analyze_and_optimize( insert.opcode = INSERT; insert.oparg = offset_from_target; -#if PARTITION_DEBUG - fprintf(stderr, "Emitting INSERT %d\n", offset_from_target); -#endif + DPRINTF(2, "Emitting INSERT %d\n", offset_from_target); + temp_writebuffer[buffer_trace_len] = insert; buffer_trace_len++; } @@ -858,9 +886,8 @@ _Py_uop_analyze_and_optimize( for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); assert(trace[temp].opcode == SAVE_IP); -#if PARTITION_DEBUG - fprintf(stderr, "Emitting SAVE_IP\n"); -#endif + DPRINTF(2, "Emitting SAVE_IP\n"); + temp_writebuffer[buffer_trace_len] = trace[temp]; buffer_trace_len++; num_dynamic_operands++; @@ -868,9 +895,9 @@ _Py_uop_analyze_and_optimize( } } -#if PARTITION_DEBUG - fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); -#endif + + DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); + temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -879,14 +906,12 @@ _Py_uop_analyze_and_optimize( * @TODO: shift these to the DSL */ -#ifdef PARTITION_DEBUG -#ifdef Py_DEBUG - fprintf(stderr, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", + + DPRINTF(2, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], opcode, oparg, i, buffer_trace_len); -#endif -#endif + switch (opcode) { #include "abstract_interp_cases.c.h" // @TODO convert these to autogenerated using DSL @@ -987,11 +1012,11 @@ _Py_uop_analyze_and_optimize( } } default: - fprintf(stderr, "Unknown opcode in abstract interpreter\n"); + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } -#if PARTITION_DEBUG +#ifdef Py_DEBUG print_ctx(ctx); #endif @@ -1003,14 +1028,14 @@ _Py_uop_analyze_and_optimize( if (opcode == EXIT_TRACE) { // Copy the rest of the stubs over, then end. -#if PARTITION_DEBUG - fprintf(stderr, "Exit trace encountered, emitting the rest of the stubs\n"); -#endif + + DPRINTF(2, "Exit trace encountered, emitting the rest of the stubs\n"); + i++; // We've already emitted an EXIT_TRACE for (; i < trace_len; i++) { -#if PARTITION_DEBUG - fprintf(stderr, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); -#endif + + DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); + temp_writebuffer[buffer_trace_len] = trace[i]; buffer_trace_len++; } @@ -1022,11 +1047,12 @@ _Py_uop_analyze_and_optimize( fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); assert(buffer_trace_len <= trace_len); -#if PARTITION_DEBUG +#ifdef Py_DEBUG if (buffer_trace_len < trace_len) { - fprintf(stderr, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); + DPRINTF(2, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); } #endif + Py_DECREF(ctx); PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 50294039af8f48..5277d72b53ee36 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -418,13 +418,5 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") - # out.assign( - # StackEffect( - # poke.as_variable(), - # poke.effect.type, - # poke.effect.cond, - # poke.effect.size, - # ), - # StackEffect("partitionnode_nullroot()"), - # ) + out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)" + f"PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") From 9a5a3f7a968b34fd3168d944a427de38e41cf78c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 00:40:59 +0800 Subject: [PATCH 33/48] make static --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index db53cb3930344d..7c6fac7e17d975 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -429,7 +429,7 @@ partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, #ifdef Py_DEBUG -void +static void print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) { char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); From df490d0c0e9cc33e43ea215f024a283069c209ca Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 02:27:39 +0800 Subject: [PATCH 34/48] make types static --- Python/optimizer_analysis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7c6fac7e17d975..3081fce43907f4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -58,7 +58,7 @@ partitionnode_dealloc(PyObject *o) Py_TYPE(self)->tp_free(o); } -PyTypeObject _Py_PartitionRootNode_Type = { +static PyTypeObject _Py_PartitionRootNode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's root node", .tp_basicsize = sizeof(_Py_PartitionRootNode), @@ -143,7 +143,7 @@ abstractinterp_dealloc(PyObject *o) Py_TYPE(self)->tp_free((PyObject *)self); } -PyTypeObject _Py_UOpsAbstractInterpContext_Type = { +static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uops abstract interpreter's context", .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), From 1e4fc94beaf4f6a133345077dc13d99f88539b99 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 03:23:02 +0800 Subject: [PATCH 35/48] make const and ignore in c analyzer --- Python/optimizer_analysis.c | 2 +- Tools/c-analyzer/cpython/ignored.tsv | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 3081fce43907f4..fbbf1864853a04 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -102,7 +102,7 @@ partitionnode_make_ref(_Py_PARTITIONNODE_t *node) } -static _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; +static const _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; // Tier 2 types meta interpreter typedef struct _Py_UOpsAbstractInterpContext { diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 66815c72ffbc63..bad1156e6607fd 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -715,3 +715,5 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - +Python/optimizer_analysis.c - _Py_PartitionRootNode_Type +Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type From 6de77a7de45fc491bc91f539cc7370da684b6b24 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 12 Aug 2023 03:37:23 +0800 Subject: [PATCH 36/48] fix c-analyzer ignored list --- Tools/c-analyzer/cpython/ignored.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index bad1156e6607fd..706fb1062770b1 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -715,5 +715,5 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type -Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type +Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - From a11fc80374b2c788141b05f20d097fc7d845f607 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 04:47:27 +0800 Subject: [PATCH 37/48] more cleanup --- Python/optimizer_analysis.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index fbbf1864853a04..142d9f519022fe 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -118,11 +118,6 @@ typedef struct _Py_UOpsAbstractInterpContext { // Indicates whether the stack entry is real or virtualised. // true - virtual false - real bool *stack_virtual_or_real; - // The following represent the real (emitted instructions) stack and locals. - // points to one element after the abstract stack - _Py_PARTITIONNODE_t *real_stack_pointer; - _Py_PARTITIONNODE_t *real_stack; - _Py_PARTITIONNODE_t *real_locals; } _Py_UOpsAbstractInterpContext; static void @@ -165,8 +160,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl self->stack_len = stack_len; self->locals_len = locals_len; - // Double the size needed because we also need a representation for the real stack and locals. - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len) * 2); + _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len)); if (locals_with_stack == NULL) { Py_DECREF(self); return NULL; @@ -183,15 +177,16 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl locals_with_stack[i] = PARTITIONNODE_NULLROOT; } + for (int i = 0; i < stack_len; i++) { + virtual_or_real[i] = false; + } + self->locals = locals_with_stack; self->stack = locals_with_stack + locals_len; self->stack_pointer = self->stack + curr_stacklen; self->stack_virtual_or_real = virtual_or_real; - self->real_locals = self->locals + locals_len + stack_len; - self->real_stack = self->stack + locals_len + stack_len; - self->real_stack_pointer = self->stack_pointer + locals_len + stack_len; return self; } @@ -778,7 +773,7 @@ _Py_uop_analyze_and_optimize( goto abstract_error; } - + // We will be adding more constants due to partial evaluation. co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); if (co_const_copy == NULL) { goto abstract_error; From 56c62eb4c515900f55a76e9c6093263010227a75 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 05:02:51 +0800 Subject: [PATCH 38/48] regen files --- Python/abstract_interp_cases.c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 3aa267d37f28a2..ed78ef148faf40 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -425,8 +425,8 @@ case LOAD_SUPER_ATTR_ATTR: { STACK_SHRINK(2); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - (0 ? 1 : 0))), true); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-(0 ? 1 : 0))), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(0)), true); break; } From 3c08ebeed2dc7ac6f8751faed8a1aa778376f2c7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 19:56:04 +0800 Subject: [PATCH 39/48] address review --- Include/internal/pycore_uops.h | 2 +- Lib/test/test_capi/test_misc.py | 1 + Makefile.pre.in | 2 ++ Python/abstract_interp_cases.c.h | 1 + Python/bytecodes.c | 3 +-- Python/executor_cases.c.h | 7 ++++++- Python/optimizer_analysis.c | 2 +- Tools/cases_generator/generate_cases.py | 9 +++------ Tools/cases_generator/instructions.py | 9 --------- 9 files changed, 16 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index d351f3353a5c6f..30b87e43a3f5d5 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_UOP_MAX_TRACE_LENGTH 256 +#define _Py_UOP_MAX_TRACE_LENGTH 64 typedef struct { int32_t opcode; diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5f0de8b47115a7..836bebf3208db1 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2643,5 +2643,6 @@ def testfunc(loops): binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] self.assertEqual(len(binop_count), 1) + if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index f931020ee49ca3..1c5912105f9941 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1553,10 +1553,12 @@ regen-cases: -m $(srcdir)/Include/internal/pycore_opcode_metadata.h.new \ -e $(srcdir)/Python/executor_cases.c.h.new \ -p $(srcdir)/Lib/_opcode_metadata.py.new \ + -a $(srcdir)/Python/abstract_interp_cases.c.h.new \ $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/abstract_interp_cases.c.h $(srcdir)/Python/abstract_interp_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ed78ef148faf40..6bfcf534646b1e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -756,5 +756,6 @@ } case INSERT: { + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7566fcdf64c908..12c5948af7a8e7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3743,13 +3743,12 @@ dummy_func( return frame; } - op(INSERT, (--)) { + op(INSERT, (stuff[oparg], top -- top, stuff[oparg])) { // Inserts TOS at position specified by oparg PyObject *tos = TOP(); for (int i = 1; i < oparg + 1; i++) { stack_pointer[i] = stack_pointer[i - 1]; } - POKE(oparg, tos); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 03b0f659c2cf41..0e9b001b422eb6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2735,11 +2735,16 @@ } case INSERT: { + PyObject *top; + PyObject **stuff; + top = stack_pointer[-1]; + stuff = stack_pointer - 1 - oparg; + stuff = stack_pointer - oparg; // Inserts TOS at position specified by oparg PyObject *tos = TOP(); for (int i = 1; i < oparg + 1; i++) { stack_pointer[i] = stack_pointer[i - 1]; } - POKE(oparg, tos); + stack_pointer[-1 - oparg] = top; break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 142d9f519022fe..5cc9312ca1165d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -173,7 +173,7 @@ _Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stackl return NULL; } - for (int i = 0; i < (locals_len + stack_len) * 2; i++) { + for (int i = 0; i < (locals_len + stack_len); i++) { locals_with_stack[i] = PARTITIONNODE_NULLROOT; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index ca7f67ca1f6627..3b0e21f245b386 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -61,7 +61,7 @@ INSTR_FMT_PREFIX = "INSTR_FMT_" -# @TODO generate all these after updating the DSL +# TODO: generate all these after updating the DSL SPECIALLY_HANDLED_ABSTRACT_INSTR = { "LOAD_FAST", "LOAD_FAST_CHECK", @@ -135,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind != "op" or (thing.kind != "inst" and self.instrs[thing.name].is_viable_uop()): + if thing.kind == "instr" or self.instrs[thing.name].is_viable_uop(): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) @@ -641,8 +641,7 @@ def write_abstract_interpreter_instructions( for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): - # TODO: Is this helpful? - self.write_overridden_instr_place_holder(thing) + pass case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) if instr.is_viable_uop() and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR: @@ -650,8 +649,6 @@ def write_abstract_interpreter_instructions( with self.out.block(f"case {thing.name}:"): instr.write(self.out, tier=TIER_TWO) self.out.emit("break;") - # elif instr.kind != "op": - # print(f"NOTE: {thing.name} is not a viable uop") case parsing.Macro(): pass case parsing.Pseudo(): diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index a10e8f41ab67db..a505df08fa265b 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -255,15 +255,6 @@ def __init__(self, inst: parsing.InstDef): def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: """Write one abstract instruction, sans prologue and epilogue.""" - # Write a static assertion that a family's cache size is correct - if family := self.family: - if self.name == family.name: - if cache_size := family.size: - out.emit( - f"static_assert({cache_size} == " - f'{self.cache_offset}, "incorrect cache size");' - ) - stacking.write_single_instr_for_abstract_interp(self, out) def write_body( From d5f16be5d5fe7b6d2294b103a7fcdef59fc667c0 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 19:56:36 +0800 Subject: [PATCH 40/48] regen --- Include/internal/pycore_opcode_metadata.h | 304 +--------------------- 1 file changed, 2 insertions(+), 302 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 86df531b161105..5c02dff63b0a3a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -65,10 +65,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; - case RESUME: - return 0; - case INSTRUMENTED_RESUME: - return 0; case LOAD_CLOSURE: return 0; case LOAD_FAST_CHECK: @@ -77,30 +73,20 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case LOAD_FAST_AND_CLEAR: return 0; - case LOAD_FAST_LOAD_FAST: - return 0; case LOAD_CONST: return 0; case STORE_FAST: return 1; case STORE_FAST_MAYBE_NULL: return 1; - case STORE_FAST_LOAD_FAST: - return 1; - case STORE_FAST_STORE_FAST: - return 2; case POP_TOP: return 1; case PUSH_NULL: return 0; case END_FOR: return 2; - case INSTRUMENTED_END_FOR: - return 2; case END_SEND: return 2; - case INSTRUMENTED_END_SEND: - return 2; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -171,8 +157,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case BINARY_SUBSCR_DICT: return 2; - case BINARY_SUBSCR_GETITEM: - return 2; case LIST_APPEND: return (oparg-1) + 2; case SET_ADD: @@ -189,40 +173,14 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 2; - case RAISE_VARARGS: - return oparg; - case INTERPRETER_EXIT: - return 1; - case RETURN_VALUE: - return 1; - case INSTRUMENTED_RETURN_VALUE: - return 1; - case RETURN_CONST: - return 0; - case INSTRUMENTED_RETURN_CONST: - return 0; case GET_AITER: return 1; case GET_ANEXT: return 1; case GET_AWAITABLE: return 1; - case SEND: - return 2; - case SEND_GEN: - return 2; - case INSTRUMENTED_YIELD_VALUE: - return 1; - case YIELD_VALUE: - return 1; case POP_EXCEPT: return 1; - case RERAISE: - return oparg + 1; - case END_ASYNC_FOR: - return 2; - case CLEANUP_THROW: - return 3; case LOAD_ASSERTION_ERROR: return 0; case LOAD_BUILD_CLASS: @@ -275,8 +233,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case DELETE_FAST: return 0; - case MAKE_CELL: - return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -311,10 +267,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return (oparg - 1) + 5; case MAP_ADD: return (oparg - 1) + 3; - case INSTRUMENTED_LOAD_SUPER_ATTR: - return 3; - case LOAD_SUPER_ATTR: - return 3; case LOAD_SUPER_METHOD: return 3; case LOAD_ZERO_SUPER_METHOD: @@ -337,24 +289,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; - case LOAD_ATTR_MODULE: - return 1; - case LOAD_ATTR_WITH_HINT: - return 1; - case LOAD_ATTR_SLOT: - return 1; - case LOAD_ATTR_CLASS: - return 1; - case LOAD_ATTR_PROPERTY: - return 1; - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: - return 1; - case STORE_ATTR_INSTANCE_VALUE: - return 2; - case STORE_ATTR_WITH_HINT: - return 2; - case STORE_ATTR_SLOT: - return 2; case COMPARE_OP: return 2; case COMPARE_OP_FLOAT: @@ -371,32 +305,16 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; - case IMPORT_NAME: - return 2; - case IMPORT_FROM: - return 1; - case JUMP_FORWARD: - return 0; - case JUMP_BACKWARD: - return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; - case ENTER_EXECUTOR: - return 0; - case POP_JUMP_IF_FALSE: - return 1; - case POP_JUMP_IF_TRUE: - return 1; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: return 1; - case JUMP_BACKWARD_NO_INTERRUPT: - return 0; case GET_LEN: return 1; case MATCH_CLASS: @@ -411,10 +329,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; - case FOR_ITER: - return 1; - case INSTRUMENTED_FOR_ITER: - return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -439,12 +353,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case FOR_ITER_RANGE: return 1; - case FOR_ITER_GEN: - return 1; - case BEFORE_ASYNC_WITH: - return 1; - case BEFORE_WITH: - return 1; case WITH_EXCEPT_START: return 4; case SETUP_FINALLY: @@ -457,70 +365,32 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 1; - case LOAD_ATTR_METHOD_WITH_VALUES: - return 1; - case LOAD_ATTR_METHOD_NO_DICT: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: - return 1; - case LOAD_ATTR_METHOD_LAZY_DICT: - return 1; - case KW_NAMES: - return 0; - case INSTRUMENTED_CALL: - return 0; - case CALL: - return oparg + 2; - case CALL_BOUND_METHOD_EXACT_ARGS: - return oparg + 2; - case CALL_PY_EXACT_ARGS: - return oparg + 2; - case CALL_PY_WITH_DEFAULTS: - return oparg + 2; case CALL_NO_KW_TYPE_1: return oparg + 2; case CALL_NO_KW_STR_1: return oparg + 2; case CALL_NO_KW_TUPLE_1: return oparg + 2; - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: - return oparg + 2; case EXIT_INIT_CHECK: return 1; - case CALL_BUILTIN_CLASS: - return oparg + 2; case CALL_NO_KW_BUILTIN_O: return oparg + 2; case CALL_NO_KW_BUILTIN_FAST: return oparg + 2; - case CALL_BUILTIN_FAST_WITH_KEYWORDS: - return oparg + 2; case CALL_NO_KW_LEN: return oparg + 2; case CALL_NO_KW_ISINSTANCE: return oparg + 2; - case CALL_NO_KW_LIST_APPEND: - return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return oparg + 2; - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: - return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return oparg + 2; - case INSTRUMENTED_CALL_FUNCTION_EX: - return 0; - case CALL_FUNCTION_EX: - return ((oparg & 1) ? 1 : 0) + 3; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 2; - case RETURN_GENERATOR: - return 0; case BUILD_SLICE: return ((oparg == 3) ? 1 : 0) + 2; case CONVERT_VALUE: @@ -535,26 +405,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case SWAP: return (oparg-2) + 2; - case INSTRUMENTED_INSTRUCTION: - return 0; - case INSTRUMENTED_JUMP_FORWARD: - return 0; - case INSTRUMENTED_JUMP_BACKWARD: - return 0; - case INSTRUMENTED_POP_JUMP_IF_TRUE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_FALSE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NONE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: - return 0; - case EXTENDED_ARG: - return 0; - case CACHE: - return 0; - case RESERVED: - return 0; case _POP_JUMP_IF_FALSE: return 1; case _POP_JUMP_IF_TRUE: @@ -566,7 +416,7 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { case EXIT_TRACE: return 0; case INSERT: - return 0; + return oparg + 1; default: return -1; } @@ -581,10 +431,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; - case RESUME: - return 0; - case INSTRUMENTED_RESUME: - return 0; case LOAD_CLOSURE: return 1; case LOAD_FAST_CHECK: @@ -593,30 +439,20 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case LOAD_FAST_AND_CLEAR: return 1; - case LOAD_FAST_LOAD_FAST: - return 2; case LOAD_CONST: return 1; case STORE_FAST: return 0; case STORE_FAST_MAYBE_NULL: return 0; - case STORE_FAST_LOAD_FAST: - return 1; - case STORE_FAST_STORE_FAST: - return 0; case POP_TOP: return 0; case PUSH_NULL: return 1; case END_FOR: return 0; - case INSTRUMENTED_END_FOR: - return 0; case END_SEND: return 1; - case INSTRUMENTED_END_SEND: - return 1; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -687,8 +523,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case BINARY_SUBSCR_DICT: return 1; - case BINARY_SUBSCR_GETITEM: - return 1; case LIST_APPEND: return (oparg-1) + 1; case SET_ADD: @@ -705,40 +539,14 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 1; - case RAISE_VARARGS: - return 0; - case INTERPRETER_EXIT: - return 0; - case RETURN_VALUE: - return 0; - case INSTRUMENTED_RETURN_VALUE: - return 0; - case RETURN_CONST: - return 0; - case INSTRUMENTED_RETURN_CONST: - return 0; case GET_AITER: return 1; case GET_ANEXT: return 2; case GET_AWAITABLE: return 1; - case SEND: - return 2; - case SEND_GEN: - return 2; - case INSTRUMENTED_YIELD_VALUE: - return 1; - case YIELD_VALUE: - return 1; case POP_EXCEPT: return 0; - case RERAISE: - return oparg; - case END_ASYNC_FOR: - return 0; - case CLEANUP_THROW: - return 2; case LOAD_ASSERTION_ERROR: return 1; case LOAD_BUILD_CLASS: @@ -791,8 +599,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg & 1 ? 1 : 0) + 1; case DELETE_FAST: return 0; - case MAKE_CELL: - return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -827,10 +633,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg - 1) + 4; case MAP_ADD: return (oparg - 1) + 1; - case INSTRUMENTED_LOAD_SUPER_ATTR: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_SUPER_ATTR: - return ((oparg & 1) ? 1 : 0) + 1; case LOAD_SUPER_METHOD: return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ZERO_SUPER_METHOD: @@ -853,24 +655,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; - case LOAD_ATTR_MODULE: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_WITH_HINT: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_SLOT: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_CLASS: - return ((oparg & 1) ? 1 : 0) + 1; - case LOAD_ATTR_PROPERTY: - return 1; - case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: - return 1; - case STORE_ATTR_INSTANCE_VALUE: - return 0; - case STORE_ATTR_WITH_HINT: - return 0; - case STORE_ATTR_SLOT: - return 0; case COMPARE_OP: return 1; case COMPARE_OP_FLOAT: @@ -887,32 +671,16 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; - case IMPORT_NAME: - return 1; - case IMPORT_FROM: - return 2; - case JUMP_FORWARD: - return 0; - case JUMP_BACKWARD: - return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; - case ENTER_EXECUTOR: - return 0; - case POP_JUMP_IF_FALSE: - return 0; - case POP_JUMP_IF_TRUE: - return 0; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: return 0; - case JUMP_BACKWARD_NO_INTERRUPT: - return 0; case GET_LEN: return 2; case MATCH_CLASS: @@ -927,10 +695,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; - case FOR_ITER: - return 2; - case INSTRUMENTED_FOR_ITER: - return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -955,12 +719,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case FOR_ITER_RANGE: return 2; - case FOR_ITER_GEN: - return 2; - case BEFORE_ASYNC_WITH: - return 2; - case BEFORE_WITH: - return 2; case WITH_EXCEPT_START: return 5; case SETUP_FINALLY: @@ -973,70 +731,32 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 2; - case LOAD_ATTR_METHOD_WITH_VALUES: - return 2; - case LOAD_ATTR_METHOD_NO_DICT: - return 2; - case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: - return 1; - case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: - return 1; - case LOAD_ATTR_METHOD_LAZY_DICT: - return 2; - case KW_NAMES: - return 0; - case INSTRUMENTED_CALL: - return 0; - case CALL: - return 1; - case CALL_BOUND_METHOD_EXACT_ARGS: - return 1; - case CALL_PY_EXACT_ARGS: - return 1; - case CALL_PY_WITH_DEFAULTS: - return 1; case CALL_NO_KW_TYPE_1: return 1; case CALL_NO_KW_STR_1: return 1; case CALL_NO_KW_TUPLE_1: return 1; - case CALL_NO_KW_ALLOC_AND_ENTER_INIT: - return 1; case EXIT_INIT_CHECK: return 0; - case CALL_BUILTIN_CLASS: - return 1; case CALL_NO_KW_BUILTIN_O: return 1; case CALL_NO_KW_BUILTIN_FAST: return 1; - case CALL_BUILTIN_FAST_WITH_KEYWORDS: - return 1; case CALL_NO_KW_LEN: return 1; case CALL_NO_KW_ISINSTANCE: return 1; - case CALL_NO_KW_LIST_APPEND: - return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return 1; - case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: - return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return 1; - case INSTRUMENTED_CALL_FUNCTION_EX: - return 0; - case CALL_FUNCTION_EX: - return 1; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 1; - case RETURN_GENERATOR: - return 0; case BUILD_SLICE: return 1; case CONVERT_VALUE: @@ -1051,26 +771,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case SWAP: return (oparg-2) + 2; - case INSTRUMENTED_INSTRUCTION: - return 0; - case INSTRUMENTED_JUMP_FORWARD: - return 0; - case INSTRUMENTED_JUMP_BACKWARD: - return 0; - case INSTRUMENTED_POP_JUMP_IF_TRUE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_FALSE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NONE: - return 0; - case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: - return 0; - case EXTENDED_ARG: - return 0; - case CACHE: - return 0; - case RESERVED: - return 0; case _POP_JUMP_IF_FALSE: return 0; case _POP_JUMP_IF_TRUE: @@ -1082,7 +782,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case EXIT_TRACE: return 0; case INSERT: - return 0; + return oparg + 1; default: return -1; } From 1e61c49bed895c6ab2db18d96ef060a60b1f46fd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:05:07 +0800 Subject: [PATCH 41/48] and env var to block tests --- Lib/test/test_capi/test_misc.py | 2 +- Python/optimizer.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 836bebf3208db1..6fbfa80a39036c 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2618,7 +2618,7 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) - +@unittest.skipIf(os.getenv("PYTHONUOPSOPTIMIZE") is None, "UOps optimization isn't enabled") class TestUopsOptimization(unittest.TestCase): def test_int_constant_propagation(self): diff --git a/Python/optimizer.c b/Python/optimizer.c index 3aaafd33113481..ff07926082e7da 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -698,7 +698,10 @@ uop_optimize( return trace_length; } OBJECT_STAT_INC(optimization_traces_created); - trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); + char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); + if (uop_optimize != NULL && *uop_optimize >= '0') { + trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); + } _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); if (executor == NULL) { return -1; From 6c24b493c879784936845b4a42611599840a2fdf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:14:30 +0800 Subject: [PATCH 42/48] regen again --- Include/internal/pycore_opcode_metadata.h | 300 ++++++++++++++++++++++ Python/abstract_interp_cases.c.h | 2 +- Python/executor_cases.c.h | 2 +- Tools/cases_generator/generate_cases.py | 2 +- 4 files changed, 303 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5c02dff63b0a3a..01e2adc3502cbb 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -65,6 +65,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; + case RESUME: + return 0; + case INSTRUMENTED_RESUME: + return 0; case LOAD_CLOSURE: return 0; case LOAD_FAST_CHECK: @@ -73,20 +77,30 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case LOAD_FAST_AND_CLEAR: return 0; + case LOAD_FAST_LOAD_FAST: + return 0; case LOAD_CONST: return 0; case STORE_FAST: return 1; case STORE_FAST_MAYBE_NULL: return 1; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 2; case POP_TOP: return 1; case PUSH_NULL: return 0; case END_FOR: return 2; + case INSTRUMENTED_END_FOR: + return 2; case END_SEND: return 2; + case INSTRUMENTED_END_SEND: + return 2; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -157,6 +171,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case BINARY_SUBSCR_DICT: return 2; + case BINARY_SUBSCR_GETITEM: + return 2; case LIST_APPEND: return (oparg-1) + 2; case SET_ADD: @@ -173,14 +189,40 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 2; + case RAISE_VARARGS: + return oparg; + case INTERPRETER_EXIT: + return 1; + case RETURN_VALUE: + return 1; + case INSTRUMENTED_RETURN_VALUE: + return 1; + case RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; case GET_AITER: return 1; case GET_ANEXT: return 1; case GET_AWAITABLE: return 1; + case SEND: + return 2; + case SEND_GEN: + return 2; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case YIELD_VALUE: + return 1; case POP_EXCEPT: return 1; + case RERAISE: + return oparg + 1; + case END_ASYNC_FOR: + return 2; + case CLEANUP_THROW: + return 3; case LOAD_ASSERTION_ERROR: return 0; case LOAD_BUILD_CLASS: @@ -233,6 +275,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case DELETE_FAST: return 0; + case MAKE_CELL: + return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -267,6 +311,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return (oparg - 1) + 5; case MAP_ADD: return (oparg - 1) + 3; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return 3; + case LOAD_SUPER_ATTR: + return 3; case LOAD_SUPER_METHOD: return 3; case LOAD_ZERO_SUPER_METHOD: @@ -289,6 +337,24 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case LOAD_ATTR_INSTANCE_VALUE: return 1; + case LOAD_ATTR_MODULE: + return 1; + case LOAD_ATTR_WITH_HINT: + return 1; + case LOAD_ATTR_SLOT: + return 1; + case LOAD_ATTR_CLASS: + return 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case STORE_ATTR_INSTANCE_VALUE: + return 2; + case STORE_ATTR_WITH_HINT: + return 2; + case STORE_ATTR_SLOT: + return 2; case COMPARE_OP: return 2; case COMPARE_OP_FLOAT: @@ -305,16 +371,32 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; + case IMPORT_NAME: + return 2; + case IMPORT_FROM: + return 1; + case JUMP_FORWARD: + return 0; + case JUMP_BACKWARD: + return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; + case ENTER_EXECUTOR: + return 0; + case POP_JUMP_IF_FALSE: + return 1; + case POP_JUMP_IF_TRUE: + return 1; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: return 1; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; case GET_LEN: return 1; case MATCH_CLASS: @@ -329,6 +411,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; + case FOR_ITER: + return 1; + case INSTRUMENTED_FOR_ITER: + return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -353,6 +439,12 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case FOR_ITER_RANGE: return 1; + case FOR_ITER_GEN: + return 1; + case BEFORE_ASYNC_WITH: + return 1; + case BEFORE_WITH: + return 1; case WITH_EXCEPT_START: return 4; case SETUP_FINALLY: @@ -365,32 +457,70 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 1; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 1; + case LOAD_ATTR_METHOD_NO_DICT: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_METHOD_LAZY_DICT: + return 1; + case KW_NAMES: + return 0; + case INSTRUMENTED_CALL: + return 0; + case CALL: + return oparg + 2; + case CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; + case CALL_PY_EXACT_ARGS: + return oparg + 2; + case CALL_PY_WITH_DEFAULTS: + return oparg + 2; case CALL_NO_KW_TYPE_1: return oparg + 2; case CALL_NO_KW_STR_1: return oparg + 2; case CALL_NO_KW_TUPLE_1: return oparg + 2; + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: + return oparg + 2; case EXIT_INIT_CHECK: return 1; + case CALL_BUILTIN_CLASS: + return oparg + 2; case CALL_NO_KW_BUILTIN_O: return oparg + 2; case CALL_NO_KW_BUILTIN_FAST: return oparg + 2; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return oparg + 2; case CALL_NO_KW_LEN: return oparg + 2; case CALL_NO_KW_ISINSTANCE: return oparg + 2; + case CALL_NO_KW_LIST_APPEND: + return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return oparg + 2; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return oparg + 2; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return oparg + 2; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case CALL_FUNCTION_EX: + return ((oparg & 1) ? 1 : 0) + 3; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 2; + case RETURN_GENERATOR: + return 0; case BUILD_SLICE: return ((oparg == 3) ? 1 : 0) + 2; case CONVERT_VALUE: @@ -405,6 +535,26 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case SWAP: return (oparg-2) + 2; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case EXTENDED_ARG: + return 0; + case CACHE: + return 0; + case RESERVED: + return 0; case _POP_JUMP_IF_FALSE: return 1; case _POP_JUMP_IF_TRUE: @@ -431,6 +581,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { switch(opcode) { case NOP: return 0; + case RESUME: + return 0; + case INSTRUMENTED_RESUME: + return 0; case LOAD_CLOSURE: return 1; case LOAD_FAST_CHECK: @@ -439,20 +593,30 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case LOAD_FAST_AND_CLEAR: return 1; + case LOAD_FAST_LOAD_FAST: + return 2; case LOAD_CONST: return 1; case STORE_FAST: return 0; case STORE_FAST_MAYBE_NULL: return 0; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 0; case POP_TOP: return 0; case PUSH_NULL: return 1; case END_FOR: return 0; + case INSTRUMENTED_END_FOR: + return 0; case END_SEND: return 1; + case INSTRUMENTED_END_SEND: + return 1; case UNARY_NEGATIVE: return 1; case UNARY_NOT: @@ -523,6 +687,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case BINARY_SUBSCR_DICT: return 1; + case BINARY_SUBSCR_GETITEM: + return 1; case LIST_APPEND: return (oparg-1) + 1; case SET_ADD: @@ -539,14 +705,40 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case CALL_INTRINSIC_2: return 1; + case RAISE_VARARGS: + return 0; + case INTERPRETER_EXIT: + return 0; + case RETURN_VALUE: + return 0; + case INSTRUMENTED_RETURN_VALUE: + return 0; + case RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; case GET_AITER: return 1; case GET_ANEXT: return 2; case GET_AWAITABLE: return 1; + case SEND: + return 2; + case SEND_GEN: + return 2; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case YIELD_VALUE: + return 1; case POP_EXCEPT: return 0; + case RERAISE: + return oparg; + case END_ASYNC_FOR: + return 0; + case CLEANUP_THROW: + return 2; case LOAD_ASSERTION_ERROR: return 1; case LOAD_BUILD_CLASS: @@ -599,6 +791,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg & 1 ? 1 : 0) + 1; case DELETE_FAST: return 0; + case MAKE_CELL: + return 0; case DELETE_DEREF: return 0; case LOAD_FROM_DICT_OR_DEREF: @@ -633,6 +827,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return (oparg - 1) + 4; case MAP_ADD: return (oparg - 1) + 1; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_SUPER_ATTR: + return ((oparg & 1) ? 1 : 0) + 1; case LOAD_SUPER_METHOD: return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ZERO_SUPER_METHOD: @@ -655,6 +853,24 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return ((oparg & 1) ? 1 : 0) + 1; case LOAD_ATTR_INSTANCE_VALUE: return (oparg & 1 ? 1 : 0) + 1; + case LOAD_ATTR_MODULE: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_WITH_HINT: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_SLOT: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_CLASS: + return ((oparg & 1) ? 1 : 0) + 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case STORE_ATTR_INSTANCE_VALUE: + return 0; + case STORE_ATTR_WITH_HINT: + return 0; + case STORE_ATTR_SLOT: + return 0; case COMPARE_OP: return 1; case COMPARE_OP_FLOAT: @@ -671,16 +887,32 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case CHECK_EXC_MATCH: return 2; + case IMPORT_NAME: + return 1; + case IMPORT_FROM: + return 2; + case JUMP_FORWARD: + return 0; + case JUMP_BACKWARD: + return 0; case JUMP: return 0; case JUMP_NO_INTERRUPT: return 0; + case ENTER_EXECUTOR: + return 0; + case POP_JUMP_IF_FALSE: + return 0; + case POP_JUMP_IF_TRUE: + return 0; case IS_NONE: return 1; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: return 0; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; case GET_LEN: return 2; case MATCH_CLASS: @@ -695,6 +927,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case GET_YIELD_FROM_ITER: return 1; + case FOR_ITER: + return 2; + case INSTRUMENTED_FOR_ITER: + return 0; case _ITER_CHECK_LIST: return 1; case _IS_ITER_EXHAUSTED_LIST: @@ -719,6 +955,12 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 2; case FOR_ITER_RANGE: return 2; + case FOR_ITER_GEN: + return 2; + case BEFORE_ASYNC_WITH: + return 2; + case BEFORE_WITH: + return 2; case WITH_EXCEPT_START: return 5; case SETUP_FINALLY: @@ -731,32 +973,70 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case PUSH_EXC_INFO: return 2; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 2; + case LOAD_ATTR_METHOD_NO_DICT: + return 2; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_METHOD_LAZY_DICT: + return 2; + case KW_NAMES: + return 0; + case INSTRUMENTED_CALL: + return 0; + case CALL: + return 1; + case CALL_BOUND_METHOD_EXACT_ARGS: + return 1; + case CALL_PY_EXACT_ARGS: + return 1; + case CALL_PY_WITH_DEFAULTS: + return 1; case CALL_NO_KW_TYPE_1: return 1; case CALL_NO_KW_STR_1: return 1; case CALL_NO_KW_TUPLE_1: return 1; + case CALL_NO_KW_ALLOC_AND_ENTER_INIT: + return 1; case EXIT_INIT_CHECK: return 0; + case CALL_BUILTIN_CLASS: + return 1; case CALL_NO_KW_BUILTIN_O: return 1; case CALL_NO_KW_BUILTIN_FAST: return 1; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return 1; case CALL_NO_KW_LEN: return 1; case CALL_NO_KW_ISINSTANCE: return 1; + case CALL_NO_KW_LIST_APPEND: + return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_O: return 1; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS: return 1; case CALL_NO_KW_METHOD_DESCRIPTOR_FAST: return 1; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case CALL_FUNCTION_EX: + return 1; case MAKE_FUNCTION: return 1; case SET_FUNCTION_ATTRIBUTE: return 1; + case RETURN_GENERATOR: + return 0; case BUILD_SLICE: return 1; case CONVERT_VALUE: @@ -771,6 +1051,26 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case SWAP: return (oparg-2) + 2; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case EXTENDED_ARG: + return 0; + case CACHE: + return 0; + case RESERVED: + return 0; case _POP_JUMP_IF_FALSE: return 0; case _POP_JUMP_IF_TRUE: diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 6bfcf534646b1e..bd8ea4b6854edd 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python/bytecodes.c +// Python\bytecodes.c // Do not edit! case NOP: { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0e9b001b422eb6..d802c1a7c02c8c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python/bytecodes.c +// Python\bytecodes.c // Do not edit! case NOP: { diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 3b0e21f245b386..ccd89c568624d2 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -135,7 +135,7 @@ def effect_str(effects: list[StackEffect]) -> str: pushed: str | None match thing: case parsing.InstDef(): - if thing.kind == "instr" or self.instrs[thing.name].is_viable_uop(): + if thing.kind != "op" or self.instrs[thing.name].is_viable_uop(): instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) pushed = effect_str(instr.output_effects) From 2be404d4ea449b458eb126f221c32e81f72cfbd8 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:31:51 +0800 Subject: [PATCH 43/48] fix generated files --- Python/abstract_interp_cases.c.h | 2 +- Python/executor_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index bd8ea4b6854edd..6bfcf534646b1e 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d802c1a7c02c8c..0e9b001b422eb6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py // from: -// Python\bytecodes.c +// Python/bytecodes.c // Do not edit! case NOP: { From 29e255d388bcc15efb89fa3bb14425daa9c296d7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:39:24 +0800 Subject: [PATCH 44/48] Address review --- Python/executor_cases.c.h | 12 ++++++------ Python/optimizer.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0e9b001b422eb6..ec988a7b8e19cc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2736,14 +2736,14 @@ case INSERT: { PyObject *top; - PyObject **stuff; + PyObject **stuff1; + PyObject **stuff2; top = stack_pointer[-1]; - stuff = stack_pointer - 1 - oparg; - stuff = stack_pointer - oparg; - // Inserts TOS at position specified by oparg - PyObject *tos = TOP(); + stuff1 = stack_pointer - 1 - oparg; + stuff2 = stack_pointer - oparg; + // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { - stack_pointer[i] = stack_pointer[i - 1]; + stack_pointer[-i] = stack_pointer[-(i - 1)]; } stack_pointer[-1 - oparg] = top; break; diff --git a/Python/optimizer.c b/Python/optimizer.c index ff07926082e7da..d3ac2424038ef9 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -699,7 +699,7 @@ uop_optimize( } OBJECT_STAT_INC(optimization_traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize != NULL && *uop_optimize >= '0') { + if (uop_optimize != NULL && *uop_optimize > '0') { trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); } _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length); From 3c441176a5eb8cbe4d161c037b70d1ccfe22f1c6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:30:12 +0800 Subject: [PATCH 45/48] fix up INSERT --- Python/bytecodes.c | 7 +++---- Python/executor_cases.c.h | 4 ---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 12c5948af7a8e7..9004472e88f2b7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3743,11 +3743,10 @@ dummy_func( return frame; } - op(INSERT, (stuff[oparg], top -- top, stuff[oparg])) { - // Inserts TOS at position specified by oparg - PyObject *tos = TOP(); + op(INSERT, (unused[oparg], top -- top, unused[oparg])) { + // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { - stack_pointer[i] = stack_pointer[i - 1]; + stack_pointer[-i] = stack_pointer[-(i - 1)]; } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ec988a7b8e19cc..5caf6a52ede352 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2736,11 +2736,7 @@ case INSERT: { PyObject *top; - PyObject **stuff1; - PyObject **stuff2; top = stack_pointer[-1]; - stuff1 = stack_pointer - 1 - oparg; - stuff2 = stack_pointer - oparg; // Inserts TOS at position specified by oparg; for (int i = 1; i < oparg + 1; i++) { stack_pointer[-i] = stack_pointer[-(i - 1)]; From b758b470b44166567f77d5d60d284ecc3d2c115e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:34:43 +0800 Subject: [PATCH 46/48] remove experimental parts --- Lib/test/test_capi/test_misc.py | 25 - Python/optimizer_analysis.c | 1046 ------------------------------- 2 files changed, 1071 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 6fbfa80a39036c..c81212202d9ef2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2618,31 +2618,6 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) -@unittest.skipIf(os.getenv("PYTHONUOPSOPTIMIZE") is None, "UOps optimization isn't enabled") -class TestUopsOptimization(unittest.TestCase): - - def test_int_constant_propagation(self): - def testfunc(loops): - num = 0 - while num < loops: - x = 0 - y = 1 - z = 2 - a = x + y + z + x + y + z + x + y + z - num += 1 - return a - - opt = _testinternalcapi.get_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(3) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - self.assertEqual(res, 9) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - self.assertEqual(len(binop_count), 1) - if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5cc9312ca1165d..e48e018052c712 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,701 +13,6 @@ #include #include "pycore_optimizer.h" -#define PARTITION_DEBUG 1 - -#define STATIC 0 -#define DYNAMIC 1 - -#define OVERALLOCATE_FACTOR 2 - -#ifdef Py_DEBUG -#define DPRINTF(level, ...) \ - if (lltrace >= (level)) { printf(__VA_ARGS__); } -#else -#define DPRINTF(level, ...) -#endif - -// TYPENODE is a tagged pointer that uses the last 2 LSB as the tag -#define _Py_PARTITIONNODE_t uintptr_t - -// PARTITIONNODE Tags -typedef enum _Py_TypeNodeTags { - // Node is unused - TYPE_NULL = 0, - // TYPE_ROOT_POSITIVE can point to a root struct or be a NULL - TYPE_ROOT= 1, - // TYPE_REF points to a TYPE_ROOT or a TYPE_REF - TYPE_REF = 2, -} _Py_TypeNodeTags; - -typedef struct _Py_PartitionRootNode { - PyObject_HEAD - // For partial evaluation - // 0 - static - // 1 - dynamic - uint8_t static_or_dynamic; - PyObject *const_val; - // For types (TODO) -} _Py_PartitionRootNode; - -static void -partitionnode_dealloc(PyObject *o) -{ - _Py_PartitionRootNode *self = (_Py_PartitionRootNode *)o; - Py_CLEAR(self->const_val); - Py_TYPE(self)->tp_free(o); -} - -static PyTypeObject _Py_PartitionRootNode_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract interpreter's root node", - .tp_basicsize = sizeof(_Py_PartitionRootNode), - .tp_dealloc = partitionnode_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; - -static inline _Py_TypeNodeTags -partitionnode_get_tag(_Py_PARTITIONNODE_t node) -{ - return node & 0b11; -} - -static inline _Py_PARTITIONNODE_t -partitionnode_clear_tag(_Py_PARTITIONNODE_t node) -{ - return node & (~(uintptr_t)(0b11)); -} - -// static_or_dynamic -// 0 - static -// 1 - dynamic -// If static, const_value must be set! -static inline _Py_PARTITIONNODE_t -partitionnode_make_root(uint8_t static_or_dynamic, PyObject *const_val) -{ - _Py_PartitionRootNode *root = PyObject_New(_Py_PartitionRootNode, &_Py_PartitionRootNode_Type); - if (root == NULL) { - return 0; - } - root->static_or_dynamic = static_or_dynamic; - root->const_val = Py_NewRef(const_val); - return (_Py_PARTITIONNODE_t)root | TYPE_ROOT; -} - -static inline _Py_PARTITIONNODE_t -partitionnode_make_ref(_Py_PARTITIONNODE_t *node) -{ - return partitionnode_clear_tag((_Py_PARTITIONNODE_t)node) | TYPE_REF; -} - - -static const _Py_PARTITIONNODE_t PARTITIONNODE_NULLROOT = (_Py_PARTITIONNODE_t)_Py_NULL | TYPE_ROOT; - -// Tier 2 types meta interpreter -typedef struct _Py_UOpsAbstractInterpContext { - PyObject_HEAD - // The following are abstract stack and locals. - // points to one element after the abstract stack - _Py_PARTITIONNODE_t *stack_pointer; - int stack_len; - _Py_PARTITIONNODE_t *stack; - int locals_len; - _Py_PARTITIONNODE_t *locals; - - // Indicates whether the stack entry is real or virtualised. - // true - virtual false - real - bool *stack_virtual_or_real; -} _Py_UOpsAbstractInterpContext; - -static void -abstractinterp_dealloc(PyObject *o) -{ - _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)o; - // Traverse all nodes and decref the root objects (if they are not NULL). - // Note: stack is after locals so this is safe - int total = self->locals_len + self->stack_len; - for (int i = 0; i < total; i++) { - _Py_PARTITIONNODE_t node = self->locals[i]; - if (partitionnode_get_tag(node) == TYPE_ROOT) { - Py_XDECREF(partitionnode_clear_tag(node)); - } - } - PyMem_Free(self->locals); - // No need to free stack because it is allocated together with the locals. - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyTypeObject _Py_UOpsAbstractInterpContext_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uops abstract interpreter's context", - .tp_basicsize = sizeof(_Py_UOpsAbstractInterpContext), - .tp_dealloc = abstractinterp_dealloc, - .tp_free = PyObject_Free, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION -}; - -_Py_UOpsAbstractInterpContext * -_Py_UOpsAbstractInterpContext_New(int stack_len, int locals_len, int curr_stacklen) -{ - _Py_UOpsAbstractInterpContext *self = (_Py_UOpsAbstractInterpContext *)PyType_GenericAlloc( - (PyTypeObject *)&_Py_UOpsAbstractInterpContext_Type, 0); - if (self == NULL) { - return NULL; - } - - // Setup - self->stack_len = stack_len; - self->locals_len = locals_len; - - _Py_PARTITIONNODE_t *locals_with_stack = PyMem_New(_Py_PARTITIONNODE_t, (locals_len + stack_len)); - if (locals_with_stack == NULL) { - Py_DECREF(self); - return NULL; - } - - bool *virtual_or_real = PyMem_New(bool, stack_len); - if (virtual_or_real == NULL) { - Py_DECREF(self); - PyMem_Free(locals_with_stack); - return NULL; - } - - for (int i = 0; i < (locals_len + stack_len); i++) { - locals_with_stack[i] = PARTITIONNODE_NULLROOT; - } - - for (int i = 0; i < stack_len; i++) { - virtual_or_real[i] = false; - } - - self->locals = locals_with_stack; - self->stack = locals_with_stack + locals_len; - self->stack_pointer = self->stack + curr_stacklen; - - self->stack_virtual_or_real = virtual_or_real; - - return self; -} - -#if PARTITION_DEBUG -static void print_ctx(_Py_UOpsAbstractInterpContext *ctx); -#endif - -static _Py_PARTITIONNODE_t * -partitionnode_get_rootptr(_Py_PARTITIONNODE_t *ref) -{ - _Py_TypeNodeTags tag = partitionnode_get_tag(*ref); - while (tag != TYPE_ROOT) { - ref = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*ref)); - tag = partitionnode_get_tag(*ref); - } - return ref; -} - -/** - * @brief Checks if two nodes are in the same partition. -*/ -static bool -partitionnode_is_same_partition(_Py_PARTITIONNODE_t *x, _Py_PARTITIONNODE_t *y) -{ - return partitionnode_get_rootptr(x) == partitionnode_get_rootptr(y); -} - -/** - * @brief Performs SET operation. dst tree becomes part of src tree - * - * If src_is_new is set, src is interpreted as a TYPE_ROOT - * not part of the type_context. Otherwise, it is interpreted as a pointer - * to a _Py_PARTITIONNODE_t. - * - * If src_is_new: - * Overwrites the root of the dst tree with the src node - * else: - * Makes the root of the dst tree a TYPE_REF to src - * -*/ -static void -partitionnode_set(_Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) -{ - { - -#ifdef Py_DEBUG - // If `src_is_new` is set: - // - `src` doesn't belong inside the type context yet. - // - `src` has to be a TYPE_ROOT - // - `src` is to be interpreted as a _Py_TYPENODE_t - if (src_is_new) { - assert(partitionnode_get_tag(*src) == TYPE_ROOT); - } -#endif - - // This prevents cycles from forming - if (!src_is_new && partitionnode_is_same_partition(src, dst)) { - return; - } - - _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); - switch (tag) { - case TYPE_ROOT: { - _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*dst); - Py_XDECREF(old_root); - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - break; - } - // Make dst the src - *dst = *src; - break; - } - case TYPE_REF: { - _Py_PARTITIONNODE_t *rootptr = partitionnode_get_rootptr(dst); - _Py_PARTITIONNODE_t old_root = partitionnode_clear_tag(*rootptr); - Py_XDECREF(old_root); - if (!src_is_new) { - // Traverse up to the root of dst, make root a reference to src - *rootptr = partitionnode_make_ref(src); - break; - } - // Make root of dst the src - *rootptr = *src; - break; - } - default: - Py_UNREACHABLE(); - } - } -} - - -/** - * @brief Performs OVERWRITE operation. dst node gets overwritten by src node - * - * If src_is_new is set, src is interpreted as a TYPE_ROOT - * not part of the ctx. Otherwise, it is interpreted as a pointer - * to a _Py_PARTITIONNODE_t. - * - * If src_is_new: - * Removes dst node from its tree (+fixes all the references to dst) - * Overwrite the dst node with the src node - * else: - * Removes dst node from its tree (+fixes all the references to dst) - * Makes the root of the dst tree a TYPE_REF to src - * -*/ -static void -partitionnode_overwrite(_Py_UOpsAbstractInterpContext *ctx, - _Py_PARTITIONNODE_t *src, _Py_PARTITIONNODE_t *dst, bool src_is_new) -{ -#ifdef Py_DEBUG - if (src_is_new) { - assert(partitionnode_get_tag((_Py_PARTITIONNODE_t)src) == TYPE_ROOT); - } -#endif - - // This prevents cycles from forming - if (!src_is_new && partitionnode_is_same_partition(src, dst)) { - return; - } - - _Py_TypeNodeTags tag = partitionnode_get_tag(*dst); - switch (tag) { - case TYPE_ROOT: { - - _Py_PARTITIONNODE_t old_dst = *dst; - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - } - else { - // Make dst the src - *dst = (_Py_PARTITIONNODE_t)src; - } - - - /* Pick one child of dst and make that the new root of the dst tree */ - - // Children of dst will have this form - _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); - // Will be initialised to the first child we find - _Py_PARTITIONNODE_t *new_root = (_Py_PARTITIONNODE_t *)NULL; - - // Search locals for children - int nlocals = ctx->locals_len; - for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); - if (*node_ptr == child_test) { - if (new_root == NULL) { - // First child encountered! initialise root - new_root = node_ptr; - *node_ptr = old_dst; - Py_XINCREF(partitionnode_clear_tag(old_dst)); - } - else { - // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(new_root); - } - } - } - - // Search stack for children - int nstack = ctx->stack_len; - for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); - if (*node_ptr == child_test) { - if (new_root == NULL) { - // First child encountered! initialise root - new_root = node_ptr; - *node_ptr = old_dst; - Py_XINCREF(partitionnode_clear_tag(old_dst)); - } - else { - // Not the first child encounted, point it to the new root - *node_ptr = partitionnode_make_ref(new_root); - } - } - } - - // This ndoe is no longer referencing the old root. - Py_XDECREF(partitionnode_clear_tag(old_dst)); - break; - } - case TYPE_REF: { - - _Py_PARTITIONNODE_t old_dst = *dst; - // Make dst a reference to src - if (!src_is_new) { - // Make dst a reference to src - *dst = partitionnode_make_ref(src); - assert(partitionnode_get_tag(*dst) == TYPE_REF); - assert(partitionnode_clear_tag(*dst) != (_Py_PARTITIONNODE_t)_Py_NULL); - } - else { - // Make dst the src - *dst = (_Py_PARTITIONNODE_t)src; - } - - /* Make all child of src be a reference to the parent of dst */ - - // Children of dst will have this form - _Py_PARTITIONNODE_t child_test = partitionnode_make_ref( - (_Py_PARTITIONNODE_t *)partitionnode_clear_tag((_Py_PARTITIONNODE_t)dst)); - - // Search locals for children - int nlocals = ctx->locals_len; - for (int i = 0; i < nlocals; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->locals[i]); - if (*node_ptr == child_test) { - // Is a child of dst. Point it to the parent of dst - *node_ptr = old_dst; - } - } - - // Search stack for children - int nstack = ctx->stack_len; - for (int i = 0; i < nstack; i++) { - _Py_PARTITIONNODE_t *node_ptr = &(ctx->stack[i]); - if (*node_ptr == child_test) { - // Is a child of dst. Point it to the parent of dst - *node_ptr = old_dst; - } - } - break; - } - default: - Py_UNREACHABLE(); - } -} - -#ifdef Py_DEBUG - -static void -print_ctx_node(_Py_UOpsAbstractInterpContext *ctx, int i, bool is_printing_stack, int nstack_use, int nstack) -{ - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - - bool is_local = false; - bool is_stack = false; - - int parent_idx = -1; - - _Py_PARTITIONNODE_t *node = is_printing_stack ? &ctx->stack[i] : &ctx->locals[i]; - _Py_PARTITIONNODE_t tag = partitionnode_get_tag(*node); - - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - - if (is_printing_stack) { - DPRINTF(3, "%s", i == nstack_use - 1 ? "." : " "); - } - - if (tag == TYPE_REF) { - _Py_PARTITIONNODE_t *parent = (_Py_PARTITIONNODE_t *)(partitionnode_clear_tag(*node)); - int local_index = (int)(parent - ctx->locals); - int stack_index = (int)(parent - ctx->stack); - is_local = local_index >= 0 && local_index < ctx->locals_len; - is_stack = stack_index >= 0 && stack_index < nstack; - parent_idx = is_local - ? local_index - : is_stack - ? stack_index - : -1; - } - - - _Py_PartitionRootNode *ptr = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - DPRINTF(3, "%s:", - ptr == NULL ? "?" : (ptr->static_or_dynamic == STATIC ? "static" : "dynamic")); - if (lltrace >= 4 && ptr != NULL && ptr->static_or_dynamic == STATIC) { - PyObject_Print(ptr->const_val, stdout, 0); - } - - if (tag == TYPE_REF) { - const char *wher = is_local - ? "locals" - : is_stack - ? "stack" - : "const"; - DPRINTF(3, "->%s[%d]", wher, parent_idx); - } -} - -/** - * @brief Print the entries in the abstract interpreter context (along with locals). -*/ -static void -print_ctx(_Py_UOpsAbstractInterpContext *ctx) -{ - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - - _Py_PARTITIONNODE_t *locals = ctx->locals; - _Py_PARTITIONNODE_t *stackptr = ctx->stack_pointer; - - int nstack_use = (int)(stackptr - ctx->stack); - int nstack = ctx->stack_len; - int nlocals = ctx->locals_len; - - DPRINTF(3, " Stack: %p: [", ctx->stack); - for (int i = 0; i < nstack; i++) { - print_ctx_node(ctx, i, true, nstack_use, nstack); - DPRINTF(3, " | "); - } - DPRINTF(3, "]\n"); - - DPRINTF(3, " Locals %p: [", locals); - for (int i = 0; i < nlocals; i++) { - print_ctx_node(ctx, i, false, nstack_use, nstack); - DPRINTF(3, " | "); - } - DPRINTF(3, "]\n"); -} -#endif - -static bool -partitionnode_is_static(_Py_PARTITIONNODE_t *node) -{ - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode *)partitionnode_clear_tag(*root); - if (root_obj == _Py_NULL) { - return false; - } - return root_obj->static_or_dynamic == STATIC; -} - -// MUST BE GUARDED BY partitionnode_is_static BEFORE CALLING THIS -static inline PyObject * -get_const(_Py_PARTITIONNODE_t *node) -{ - assert(partitionnode_is_static(node)); - _Py_PARTITIONNODE_t *root = partitionnode_get_rootptr(node); - _Py_PartitionRootNode *root_obj = (_Py_PartitionRootNode * )partitionnode_clear_tag(*root); - return root_obj->const_val; -} - -// Hardcoded for now, @TODO autogenerate these from the DSL. -static inline bool -op_is_pure(int opcode, int oparg, _Py_PARTITIONNODE_t *locals) -{ - switch (opcode) { - case LOAD_CONST: - case _BINARY_OP_MULTIPLY_INT: - case _BINARY_OP_ADD_INT: - case _BINARY_OP_SUBTRACT_INT: - case _GUARD_BOTH_INT: - return true; - case LOAD_FAST: - return partitionnode_is_static(&locals[oparg]) && get_const(&locals[oparg]) != _Py_NULL; - default: - return false; - } -} - -static inline bool -op_is_jump(int opcode) -{ - return (opcode == _POP_JUMP_IF_FALSE || opcode == _POP_JUMP_IF_TRUE); -} - - -// Number the jump targets and the jump instructions with a unique (negative) ID. -// This replaces the instruction's opcode in the trace with their negative IDs. -// Aids relocation later when we need to recompute jumps after optimization passes. -static _PyUOpInstruction * -number_jumps_and_targets(_PyUOpInstruction *trace, int trace_len, int *max_id) -{ - int jump_and_target_count = 0; - int jump_and_target_id = -1; - for (int i = 0; i < trace_len; i++) { - if (op_is_jump(trace[i].opcode)) { - // 1 for the jump, 1 for its target - jump_and_target_count += 2; - } - } - - // +1 because 1-based indexing not zero based - _PyUOpInstruction *jump_id_to_instruction = PyMem_New(_PyUOpInstruction, jump_and_target_count + 1); - if (jump_id_to_instruction == NULL) { - return NULL; - } - - - for (int i = 0; i < trace_len; i++) { - if (op_is_jump(trace[i].opcode)) { - int target = trace[i].oparg; - int target_id = jump_and_target_id; - - // 1 for the jump target - assert(jump_and_target_id < 0); - // Negative opcode! - assert(trace[target].opcode > 0); - // Already assigned a jump ID - if (trace[target].opcode < 0) { - target_id = trace[target].opcode; - } - else { - // Else, assign a new jump ID. - jump_id_to_instruction[-target_id] = trace[target]; - trace[target].opcode = target_id; - jump_and_target_id--; - } - - // 1 for the jump - assert(jump_and_target_id < 0); - jump_id_to_instruction[-jump_and_target_id] = trace[i]; - // Negative opcode! - assert(trace[i].opcode >= 0); - trace[i].opcode = jump_and_target_id; - jump_and_target_id--; - // Point the jump to the target ID. - trace[i].oparg = target_id; - - } - } - *max_id = jump_and_target_id; - return jump_id_to_instruction; -} - -// Remove contiguous SAVE_IPs, leaving only the last one before a non-SAVE_IP instruction. -static int -remove_duplicate_save_ips(_PyUOpInstruction *trace, int trace_len) -{ -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - - // Don't have to allocate a temporary trace array - // because the writer is guaranteed to be behind the reader. - int new_temp_len = 0; - - _PyUOpInstruction curr; - for (int i = 0; i < trace_len; i++) { - curr = trace[i]; - if (i < trace_len && curr.opcode == SAVE_IP && trace[i+1].opcode == SAVE_IP) { - continue; - } - trace[new_temp_len] = curr; - new_temp_len++; - } - - - DPRINTF(2, "Removed %d SAVE_IPs\n", trace_len - new_temp_len); - - return new_temp_len; -} - -/** - * Fixes all side exits due to jumps. This MUST be called as the last - * pass over the trace. Otherwise jumps will point to invalid ends. - * - * Runtime complexity of O(n*k), where n is trace length and k is number of jump - * instructions. Since k is usually quite low, this is nearly linear. -*/ -static void -fix_jump_side_exits(_PyUOpInstruction *trace, int trace_len, - _PyUOpInstruction *jump_id_to_instruction, int max_jump_id) -{ - for (int i = 0; i < trace_len; i++) { - int oparg = trace[i].oparg; - int opcode = trace[i].opcode; - // Indicates it's a jump target or jump instruction - if (opcode < 0 && opcode > max_jump_id) { - opcode = -opcode; - int real_opcode = jump_id_to_instruction[opcode].opcode; - if (op_is_jump(real_opcode)) { - trace[i].opcode = real_opcode; - - // Search for our target ID. - int target_id = oparg; - for (int x = 0; x < trace_len; x++) { - if (trace[x].opcode == target_id) { - trace[i].oparg = x; - break; - } - } - - assert(trace[i].oparg >= 0); - } - } - } - - // Final pass to swap out all the jump target IDs with their actual targets. - for (int i = 0; i < trace_len; i++) { - int opcode = trace[i].opcode; - // Indicates it's a jump target or jump instruction - if (opcode < 0 && opcode > max_jump_id) { - int real_oparg = jump_id_to_instruction[-opcode].oparg; - int real_opcode = jump_id_to_instruction[-opcode].opcode; - trace[i].oparg = real_oparg; - trace[i].opcode = real_opcode; - } - } -} - -#ifndef Py_DEBUG -#define GETITEM(v, i) PyList_GET_ITEM((v), (i)) -#else -static inline PyObject * -GETITEM(PyObject *v, Py_ssize_t i) { - assert(PyList_CheckExact(v)); - assert(i >= 0); - assert(i < PyList_GET_SIZE(v)); - return PyList_GET_ITEM(v, i); -} -#endif int _Py_uop_analyze_and_optimize( @@ -717,356 +22,5 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { -#define STACK_LEVEL() ((int)(*stack_pointer - stack)) -#define STACK_SIZE() (co->co_stacksize) -#define BASIC_STACKADJ(n) (*stack_pointer += n) - -#ifdef Py_DEBUG -#define STACK_GROW(n) do { \ - assert(n >= 0); \ - BASIC_STACKADJ(n); \ - assert(STACK_LEVEL() <= STACK_SIZE()); \ - } while (0) -#define STACK_SHRINK(n) do { \ - assert(n >= 0); \ - assert(STACK_LEVEL() >= n); \ - BASIC_STACKADJ(-(n)); \ - } while (0) -#else -#define STACK_GROW(n) BASIC_STACKADJ(n) -#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) -#endif -#define PEEK(idx) (&((*stack_pointer)[-(idx)])) -#define GETLOCAL(idx) (&(locals[idx])) - -#define PARTITIONNODE_SET(src, dst, flag) partitionnode_set((src), (dst), (flag)) -#define PARTITIONNODE_OVERWRITE(src, dst, flag) partitionnode_overwrite(ctx, (src), (dst), (flag)) -#define MAKE_STATIC_ROOT(val) partitionnode_make_root(0, (val)) -#ifdef Py_DEBUG - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } -#endif - - PyObject *co_const_copy = NULL; - _PyUOpInstruction *jump_id_to_instruction = NULL; - - _PyUOpInstruction *temp_writebuffer = PyMem_New(_PyUOpInstruction, trace_len * OVERALLOCATE_FACTOR); - if (temp_writebuffer == NULL) { - return trace_len; - } - - int buffer_trace_len = 0; - - _Py_UOpsAbstractInterpContext *ctx = _Py_UOpsAbstractInterpContext_New( - co->co_stacksize, co->co_nlocals, curr_stacklen); - if (ctx == NULL) { - PyMem_Free(temp_writebuffer); - return trace_len; - } - - int max_jump_id = 0; - jump_id_to_instruction = number_jumps_and_targets(trace, trace_len, &max_jump_id); - if (jump_id_to_instruction == NULL) { - goto abstract_error; - } - - // We will be adding more constants due to partial evaluation. - co_const_copy = PyList_New(PyTuple_Size(co->co_consts)); - if (co_const_copy == NULL) { - goto abstract_error; - } - // Copy over the co_const tuple - for (int x = 0; x < PyTuple_GET_SIZE(co->co_consts); x++) { - PyList_SET_ITEM(co_const_copy, x, Py_NewRef(PyTuple_GET_ITEM(co->co_consts, x))); - } - - int oparg; - int opcode; - bool *stack_virtual_or_real = ctx->stack_virtual_or_real; - - _Py_PARTITIONNODE_t **stack_pointer = &ctx->stack_pointer; - _Py_PARTITIONNODE_t *stack = ctx->stack; - _Py_PARTITIONNODE_t *locals = ctx->locals; - for (int i = 0; i < trace_len; i++) { - oparg = trace[i].oparg; - opcode = trace[i].opcode; - - // Is a special jump/target ID, decode that - if (opcode < 0 && opcode > max_jump_id) { - DPRINTF(2, "Special jump target/ID %d\n", opcode); - oparg = jump_id_to_instruction[-opcode].oparg; - opcode = jump_id_to_instruction[-opcode].opcode; - } - - // Partial evaluation - the partition nodes already gave us the static-dynamic variable split. - // For partial evaluation, we simply need to follow these rules: - // 1. Operations on dynamic variables need to be emitted. - // If an operand was previously partially evaluated and not yet emitted, then emit the residual with a LOAD_CONST. - // 2. Operations on static variables are a no-op as the abstract interpreter already analyzed their results. - - // For all stack inputs, are their variables static? - int num_inputs = _PyOpcode_num_popped(opcode, oparg, false); - int num_dynamic_operands = 0; - - // We need to also check if this operation is "pure". That it can accept - // constant nodes, output constant nodes, and does not cause any side effects. - bool should_emit = !op_is_pure(opcode, oparg, locals); - - int virtual_objects = 0; - assert(num_inputs >= 0); - for (int x = num_inputs; x > 0; x--) { - if (!partitionnode_is_static(PEEK(x))) { - should_emit = true; - num_dynamic_operands++; - } - if (stack_virtual_or_real[STACK_LEVEL() - num_inputs]) { - virtual_objects++; - } - } - - int num_static_operands = num_inputs - num_dynamic_operands; - - assert(num_static_operands >= 0); - - - if (should_emit) { - if (num_static_operands > 0) { - int real_stack_size = num_dynamic_operands; - int virtual_stack_size = (int)(ctx->stack_pointer - ctx->stack); - assert(virtual_stack_size >= real_stack_size); - for (int x = num_inputs; x > 0; x--) { - // Re-materialise all virtual (partially-evaluated) constants - if (partitionnode_is_static(PEEK(x)) && stack_virtual_or_real[STACK_LEVEL() - x]) { - stack_virtual_or_real[STACK_LEVEL() - x] = false; - PyObject *const_val = get_const(PEEK(x)); - _PyUOpInstruction load_const; - load_const.opcode = LOAD_CONST; - load_const.oparg = (int)PyList_GET_SIZE(co_const_copy); - if (PyList_Append(co_const_copy, const_val) < 0) { - goto abstract_error; - } - - DPRINTF(2, "Emitting LOAD_CONST\n"); - - temp_writebuffer[buffer_trace_len] = load_const; - buffer_trace_len++; - - - // INSERT to the correct position in the stack - int target_entry = virtual_stack_size - x; - - int offset_from_target = real_stack_size - target_entry; - if (offset_from_target > 0) { - _PyUOpInstruction insert; - insert.opcode = INSERT; - insert.oparg = offset_from_target; - - DPRINTF(2, "Emitting INSERT %d\n", offset_from_target); - - temp_writebuffer[buffer_trace_len] = insert; - buffer_trace_len++; - } - - // Use the next SAVE_IP - int temp = i; - for (; trace[temp].opcode != SAVE_IP && temp < trace_len; temp++); - assert(trace[temp].opcode == SAVE_IP); - - DPRINTF(2, "Emitting SAVE_IP\n"); - - temp_writebuffer[buffer_trace_len] = trace[temp]; - buffer_trace_len++; - num_dynamic_operands++; - } - - } - } - - DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); - - temp_writebuffer[buffer_trace_len] = trace[i]; - buffer_trace_len++; - } - /* - * The following are special cased: - * @TODO: shift these to the DSL - */ - - - DPRINTF(2, " [-] Type propagating across: %s{%d} : %d. {reader: %d, writer: %d}\n", - (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode], - opcode, oparg, - i, buffer_trace_len); - - switch (opcode) { -#include "abstract_interp_cases.c.h" - // @TODO convert these to autogenerated using DSL - case LOAD_FAST: - case LOAD_FAST_CHECK: - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - break; - case LOAD_FAST_AND_CLEAR: { - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(GETLOCAL(oparg), PEEK(1), false); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, GETLOCAL(oparg), true); - break; - } - case LOAD_CONST: { - _Py_PARTITIONNODE_t* value = (_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(GETITEM(co_const_copy, oparg)); - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(value, PEEK(1), true); - break; - } - case STORE_FAST: - case STORE_FAST_MAYBE_NULL: { - _Py_PARTITIONNODE_t *value = PEEK(1); - PARTITIONNODE_OVERWRITE(value, GETLOCAL(oparg), false); - STACK_SHRINK(1); - break; - } - case COPY: { - _Py_PARTITIONNODE_t *bottom = PEEK(1 + (oparg - 1)); - STACK_GROW(1); - PARTITIONNODE_OVERWRITE(bottom, PEEK(1), false); - break; - } - - // Arithmetic operations - - case _BINARY_OP_MULTIPLY_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - - } - - case _BINARY_OP_ADD_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - } - - case _BINARY_OP_SUBTRACT_INT: { - if (!should_emit) { - PyObject *right; - PyObject *left; - PyObject *res; - right = get_const(PEEK(1)); - left = get_const(PEEK(2)); - STAT_INC(BINARY_OP, hit); - res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - if (res == NULL) goto abstract_error; - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)MAKE_STATIC_ROOT(res), PEEK(-(-1)), true); - break; - } - else { - STACK_SHRINK(1); - PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); - break; - } - } - default: - DPRINTF(1, "Unknown opcode in abstract interpreter\n"); - Py_UNREACHABLE(); - } - -#ifdef Py_DEBUG - print_ctx(ctx); -#endif - - // Mark all stack outputs as virtual or real - int stack_outputs = _PyOpcode_num_pushed(opcode, oparg, false); - for (int y = stack_outputs; y > 0; y--) { - stack_virtual_or_real[STACK_LEVEL() - y] = !should_emit; - } - - if (opcode == EXIT_TRACE) { - // Copy the rest of the stubs over, then end. - - DPRINTF(2, "Exit trace encountered, emitting the rest of the stubs\n"); - - i++; // We've already emitted an EXIT_TRACE - for (; i < trace_len; i++) { - - DPRINTF(2, "Emitting %s\n", (opcode >= 300 ? _PyOpcode_uop_name : _PyOpcode_OpName)[opcode]); - - temp_writebuffer[buffer_trace_len] = trace[i]; - buffer_trace_len++; - } - break; - } - } - assert(STACK_SIZE() >= 0); - buffer_trace_len = remove_duplicate_save_ips(temp_writebuffer, buffer_trace_len); - fix_jump_side_exits(temp_writebuffer, buffer_trace_len, jump_id_to_instruction, max_jump_id); - assert(buffer_trace_len <= trace_len); - -#ifdef Py_DEBUG - if (buffer_trace_len < trace_len) { - DPRINTF(2, "Shortened trace by %d instructions\n", trace_len - buffer_trace_len); - } -#endif - - Py_DECREF(ctx); - - PyObject *co_const_final = PyTuple_New(PyList_Size(co_const_copy)); - if (co_const_final == NULL) { - goto abstract_error; - } - // Copy over the co_const tuple - for (int x = 0; x < PyList_GET_SIZE(co_const_copy); x++) { - PyTuple_SET_ITEM(co_const_final, x, Py_NewRef(PyList_GET_ITEM(co_const_copy, x))); - } - - - Py_SETREF(co->co_consts, co_const_final); - Py_XDECREF(co_const_copy); - memcpy(trace, temp_writebuffer, buffer_trace_len * sizeof(_PyUOpInstruction)); - PyMem_Free(temp_writebuffer); - PyMem_Free(jump_id_to_instruction); - return buffer_trace_len; - -abstract_error: - Py_XDECREF(co_const_copy); - Py_DECREF(ctx); - PyMem_Free(temp_writebuffer); - PyMem_Free(jump_id_to_instruction); - assert(PyErr_Occurred()); - PyErr_Clear(); return trace_len; } From 80c7f1826d6c051d3a0648ef945e4ac65a7e9b98 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 15 Aug 2023 17:31:07 +0800 Subject: [PATCH 47/48] revert more changes --- Include/internal/pycore_uops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 30b87e43a3f5d5..254eeca2361bea 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -11,8 +11,8 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 64 typedef struct { - int32_t opcode; - int32_t oparg; + uint32_t opcode; + uint32_t oparg; uint64_t operand; // A cache entry } _PyUOpInstruction; From 6a2b204ef97f60cd4d095ab6cbd8c12d2dbc6bdf Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 16 Aug 2023 01:23:07 +0800 Subject: [PATCH 48/48] use memmove --- Python/bytecodes.c | 4 +--- Python/executor_cases.c.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9004472e88f2b7..e9a5cf59e7d689 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3745,9 +3745,7 @@ dummy_func( op(INSERT, (unused[oparg], top -- top, unused[oparg])) { // Inserts TOS at position specified by oparg; - for (int i = 1; i < oparg + 1; i++) { - stack_pointer[-i] = stack_pointer[-(i - 1)]; - } + memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5caf6a52ede352..85d27777423abd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2738,9 +2738,7 @@ PyObject *top; top = stack_pointer[-1]; // Inserts TOS at position specified by oparg; - for (int i = 1; i < oparg + 1; i++) { - stack_pointer[-i] = stack_pointer[-(i - 1)]; - } + memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); stack_pointer[-1 - oparg] = top; break; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy